# List of Indian Cadidates for Lok Sabha Election Data Analysis and Visualization

![](https://www.jagranjosh.com/imported/images/E/Others/parliament1.webp)

## Import Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

plt.style.use('fivethirtyeight')

## Load Dataset

In [None]:
%time candidates_2004 = pd.read_csv('../input/lok-sabha-election-candidate-list-2004-to-2019/LokSabha2004.csv')
%time candidates_2009 = pd.read_csv('../input/lok-sabha-election-candidate-list-2004-to-2019/LokSabha2009.csv')
%time candidates_2014 = pd.read_csv('../input/lok-sabha-election-candidate-list-2004-to-2019/LokSabha2014.csv')
%time candidates_2019 = pd.read_csv('../input/lok-sabha-election-candidate-list-2004-to-2019/LokSabha2019.csv')

We have data file for each year. So let merge them in one file. before doing that I am going to add year column.

In [None]:
candidates_2004['Year'] = 2004
candidates_2009['Year'] = 2009
candidates_2014['Year'] = 2014
candidates_2019['Year'] = 2019

Now we have year column in each data frame. Let's merge them in once data frame using `pd.concat` method.

In [None]:
candidates = pd.concat([candidates_2004, candidates_2009, candidates_2014, candidates_2019])

## Statistics of Data

Let's have a look at example samples.

In [None]:
candidates.head(10)

In [None]:
candidates.info()

In [None]:
candidates.describe()

In [None]:
candidates.isnull().sum()

As we can see there is no null value in the data. 

In [None]:
candidates.shape

We have 28 thousand candidates details.

# Year wise Data Analysis and Visualization

## 1. Youngest Candidate who participated in Lok Sabha Election?

Some entries have value lesser than 18 so I am ignoring those value because that values are false.

In [None]:
candidate_age_2004 = candidates_2004[candidates_2004['Age'] > 18]
candidate_age_2009 = candidates_2009[candidates_2009['Age'] > 18]
candidate_age_2014 = candidates_2014[candidates_2014['Age'] > 18]
candidate_age_2019 = candidates_2019[candidates_2019['Age'] > 18]

In [None]:
candidate_age_2004[candidate_age_2004['Age'] == candidate_age_2004['Age'].min()]

In 2004's Lok Sabha Election,yougest candidate age was 25 years.

In [None]:
candidate_age_2009[candidate_age_2009['Age'] == candidate_age_2009['Age'].min()]

In [None]:
candidate_age_2014[candidate_age_2014['Age'] == candidate_age_2014['Age'].min()]

In [None]:
candidate_age_2019[candidate_age_2019['Age'] == candidate_age_2019['Age'].min()]

## Ravikant Yadav is youngest candidate, 21 Age who participated in 2014 lok sabha election.

In [None]:
plt.figure(figsize=(20,12))
edgecolor=(0,0,0),
sns.countplot(candidate_age_2004['Age'].sort_values(), palette = "Dark2", edgecolor=(0,0,0))
plt.title("2004 Election's Candidate Age Count",fontsize=20)
plt.xlabel('Age')
plt.ylabel('Count')
plt.xticks(fontsize=12,rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(20,12))
edgecolor=(0,0,0),
sns.countplot(candidate_age_2009['Age'].sort_values(), palette = "Dark2", edgecolor=(0,0,0))
plt.title("Candidate Age Count",fontsize=20)
plt.xlabel('Age')
plt.ylabel('Count')
plt.xticks(fontsize=12,rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(20,12))
edgecolor=(0,0,0),
sns.countplot(candidate_age_2014['Age'].sort_values(), palette = "Dark2", edgecolor=(0,0,0))
plt.title("Candidate Age Count",fontsize=20)
plt.xlabel('Age')
plt.ylabel('Count')
plt.xticks(fontsize=12,rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(20,12))
edgecolor=(0,0,0),
sns.countplot(candidate_age_2019['Age'].sort_values(), palette = "Dark2", edgecolor=(0,0,0))
plt.title("Candidate Age Count",fontsize=20)
plt.xlabel('Age')
plt.ylabel('Count')
plt.xticks(fontsize=12,rotation=90)
plt.show()

In [None]:
candidates['Age'].value_counts()

## There are 904 candidates who are 38 years old. 

## 2. Candidate with Highest number of Criminal Cases.

In [None]:
candidates_2004[candidates_2004['Criminal Cases'] == candidates_2004['Criminal Cases'].max()]

In [None]:
candidates_2009[candidates_2009['Criminal Cases'] == candidates_2009['Criminal Cases'].max()]

In [None]:
candidates_2014[candidates_2014['Criminal Cases'] == candidates_2014['Criminal Cases'].max()]

In [None]:
candidates_2019[candidates_2019['Criminal Cases'] == candidates_2019['Criminal Cases'].max()]

In [None]:
candidates.sort_values(['Criminal Cases'], ascending=False).head().style.background_gradient(subset = ['Age', 'Criminal Cases'], cmap = 'YlGn')

## Candidate Udayakumar S.p. has highest number of criminal cases which are 382.

## Parties with total number of criminal cases.

In [None]:
criminal_cases_2004 = candidates_2004[['Party', 'Criminal Cases']].groupby('Party').sum('Criminal Cases')
criminal_cases_2009 = candidates_2009[['Party', 'Criminal Cases']].groupby('Party').sum('Criminal Cases')
criminal_cases_2014 = candidates_2014[['Party', 'Criminal Cases']].groupby('Party').sum('Criminal Cases')
criminal_cases_2019 = candidates_2019[['Party', 'Criminal Cases']].groupby('Party').sum('Criminal Cases')

In [None]:
criminal_cases_2004.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
criminal_cases_2009.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
criminal_cases_2014.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
criminal_cases_2019.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

## Independent candidates have highest number of criminal cases each year except 2014. 

## Number of candidate participated from each Party since 2004.

In [None]:
loc = candidates["Party"].value_counts()
sns.set(style="whitegrid")
sns.barplot(y=loc[:10], x=loc[:10].index, palette="Set2")
plt.xticks(rotation=90)
plt.xlabel('Party')
plt.ylabel('Candidate Count')
plt.title("Different Political Party and Candidate Count", fontweight="bold")

In [None]:
plt.figure(figsize=(9,10))
ax = sns.barplot(x=loc[:10], y=loc[:10].index,
                 palette="tab20c",
                 linewidth = 1)
for i,j in enumerate(loc[:10]):
    ax.text(.5, i, j, weight="bold", color = 'black', fontsize = 13)
plt.title("Candidate count of each party since 2004")
ax.set_xlabel(xlabel = 'Candidate Count', fontsize = 10)
ax.set_ylabel(ylabel = 'Party', fontsize = 10)
plt.show()

## Education of each candidate.

In [None]:
plt.figure(figsize=(20,12))
edgecolor=(0,0,0),
sns.countplot(candidates['Education'].sort_values(), palette = "Dark2", edgecolor=(0,0,0))
plt.title("Candidate Education Count",fontsize=20)
plt.xlabel('Education')
plt.ylabel('Count')
plt.xticks(fontsize=12,rotation=90)
plt.show()

## Most comman education level is Graduation. 

In [None]:
candidate_education = candidates['Education'].value_counts()

In [None]:
candidate_education

In [None]:
plt.figure(figsize=(20,12))
wedge_dict = {
    'edgecolor': 'black',
    'linewidth': 2        
}

explode = (0, 0.1, 0, 0.1, 0, 0.1, 0, 0.1, 0, 0, 0, 0)

plt.pie(candidate_education, explode=explode, autopct='%1.2f%%', wedgeprops=wedge_dict, labels = candidate_education.index)
plt.show()

## City with highest criminal cases

In [None]:
city_criminal_cases_2004 = candidates_2004[['Constituency', 'Criminal Cases']].groupby('Constituency').sum('Criminal Cases')
city_criminal_cases_2009 = candidates_2009[['Constituency', 'Criminal Cases']].groupby('Constituency').sum('Criminal Cases')
city_criminal_cases_2014 = candidates_2014[['Constituency', 'Criminal Cases']].groupby('Constituency').sum('Criminal Cases')
city_criminal_cases_2019 = candidates_2019[['Constituency', 'Criminal Cases']].groupby('Constituency').sum('Criminal Cases')

In [None]:
city_criminal_cases_2004.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
cases_2004 = city_criminal_cases_2004.sort_values(['Criminal Cases'], ascending=False)['Criminal Cases'][:10]

sns.barplot(y=cases_2004.values, x=cases_2004.index, palette="Set2")
plt.xticks(rotation=90)
plt.xlabel('Constituency')
plt.ylabel('Criminal Cases')
plt.title("Different Constituency and Criminal cases count in 2004 Elections", fontweight="bold")

In [None]:
city_criminal_cases_2009.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
cases_2009 = city_criminal_cases_2009.sort_values(['Criminal Cases'], ascending=False)['Criminal Cases'][:10]
sns.barplot(y=cases_2009.values, x=cases_2009.index, palette="Set2")
plt.xticks(rotation=90)
plt.xlabel('Constituency')
plt.ylabel('Criminal Cases')
plt.title("Different Constituency and Criminal cases count in 2009 Elections", fontweight="bold")

In [None]:
city_criminal_cases_2014.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
cases_2014 = city_criminal_cases_2014.sort_values(['Criminal Cases'], ascending=False)['Criminal Cases'][:10]
sns.barplot(y=cases_2014.values, x=cases_2014.index, palette="Set2")
plt.xticks(rotation=90)
plt.xlabel('Constituency')
plt.ylabel('Criminal Cases')
plt.title("Different Constituency and Criminal cases count in 2014 Elections", fontweight="bold")

In [None]:
city_criminal_cases_2019.sort_values(['Criminal Cases'], ascending=False).head(10).style.background_gradient(subset = ['Criminal Cases'], cmap = 'PuBu')

In [None]:
cases_2019 = city_criminal_cases_2019.sort_values(['Criminal Cases'], ascending=False)['Criminal Cases'][:10]
sns.barplot(y=cases_2019.values, x=cases_2019.index, palette="Set2")
plt.xticks(rotation=90)
plt.xlabel('Constituency')
plt.ylabel('Criminal Cases')
plt.title("Different Constituency and Criminal cases count in 2019 Elections", fontweight="bold")

## Work in Progress... ⏳