# The Great Indian Parliament - Lok Sabha


This notebook <b>introduces you to all 540 sitting members of parliament, of the seventeenth Lok sabha, India </b> (as on 16-Jun-2021)and includes their demographics (age, gender, caste category) and constituencies (and states). <br>
(data source: http://loksabhaph.nic.in)

This code could be recycled for basic data cleaning; it also highlights some basic details about the members (e.g. average age>56 years, ~15% women, 24% SC/ST).

This notebook is divided into two sections - <br>
[1. Reading and cleaning data](#read-data) <br>
[2. Basic Exploratory Data Analysis (EDA)](#eda)

<b> Future work </b> The plan behind collecting and sharing this data is to conduct a thorough analysis on the develepment of the Indian states vis-à-vis the demographics of their representatives in the Lok sabha.

### 1. Reading and cleaning data ###
<div id="read-data">

In [None]:
#import packages

import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#read and view data

mp_list = pd.read_csv('/kaggle/input/indiastatewiseloksabhamembers/loksabha_seats_statewise.csv')
print("row, column = ", mp_list.shape)
mp_list.head()

In [None]:
#check variables and data types
mp_list.info()

In [None]:
#convert to appropriate data type
mp_list['DOB'] = pd.to_datetime(mp_list['DOB'])
mp_list.info()

In [None]:
mp_list.tail()

### 2. Basic Exploratory Data Analysis (EDA) ###
<div id="eda">

<b>More than a third of the members are aged 50-60 years</b>, and therefore the mean, median and mode are all in this range.

In [None]:
age=mp_list['Age']

sns.set_style(style='white')
sns.displot(age, kde=True, rug=True, height=5, aspect=2, color="#4b4b4b");
plt.axvline(age.median(),color='#fd5c02', linestyle='--', label="median = "+str(int(age.median()))+" years")
plt.axvline(age.mean(),color='b', linestyle='--', label="mean = "+str(int(age.mean()))+" years")
plt.axvline(age.mode().values[0],color='black', linestyle='--', label="mode = "+str(int(age.mode().values[0]))+" years")

plt.legend(fontsize=14, facecolor='silver')
plt.title("Age distribution of sitting members of Loksabha, 2021", fontsize=20)
footnote="*Total number of sitting members = " + str(mp_list.shape[0])
footnote=footnote+"\n Date of birth (and therefore age) is missing for "
footnote=footnote+str(mp_list.shape[0]-age.count())+" members"

plt.figtext(0.5, - 0.05, footnote, ha="center", fontsize=14, fontstyle="italic", bbox={"facecolor":"w", "alpha":0.5, "pad":5})

plt.show()

In [None]:
print("median = ", age.median())
print("mean = ", round(age.mean(),2))
print("mode = ", age.mode().values[0])
print("skew = ", round(age.skew(),2))
print("kurtosis = ", round(age.kurt(),2))
age.count()

Not all states are of the same size, and with the same population. Therefore the number of representatives in these states (and union territories) also vary widely. <b>The largest 5 states are represented by more than 46% of the members.</b> 

In [None]:
print("number of states = ", mp_list[mp_list['UT']==0]['State'].nunique())
print("number of union territories = ", mp_list[mp_list['UT']==1]['State'].nunique())

In [None]:
pct=mp_list['State'].value_counts().sort_values(ascending=True)
pct=pd.DataFrame(pct)
pct['pct']=round(pct['State']*100/pct['State'].sum()).astype(int)
pct['label']=pct['pct'].astype(str)+"%"

pct1=pct.copy()
pct1=pct1[pct1['pct']>=0.1]
pct1.reset_index(inplace=True)
pct1['label']=pct1['index']+" ("+pct1['pct'].astype(str)+"%)"
pct1.set_index(['label'],inplace=True)
pct1.sort_values(by=['pct'],ascending=False,inplace=True)


fig, ax = plt.subplots(figsize=(6,8))
pct['State'].plot.barh(ax=ax,color=["#fd5c02"])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.bar_label(ax.containers[0], label_type='edge', padding=3)
ax.margins(x=0.5)
ax.set_xticks([])

axin = ax.inset_axes([0.2, 0.35, 1.25, 0.25])
#axin.set_visible(False)
axin.spines['top'].set_visible(False)
axin.spines['right'].set_visible(False)
axin.spines['bottom'].set_visible(False)
axin.spines['left'].set_visible(False)

my_cmap = plt.get_cmap("binary")
y=pct1['pct']
rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))
pct1[['pct']].T.plot.barh(ax=axin,stacked=True,color=my_cmap(rescale(y)))
axin.margins(x=0.5)
axin.legend(bbox_to_anchor=(0.65, 2.5), loc='upper left')
#axin.bar_label(ax.containers[0], label_type='center', color='w',padding=30)
axin.set_yticks([])
axin.set_xticks([])

plt.title('Statewise number of sitting members of Loksabha, 2021', fontsize=20, pad=15)
plt.show()

<b>~15% of the loksabha members are women.</b>

In [None]:
pct=mp_list['Woman'].value_counts()

plt.style.use('ggplot')

pct.plot.pie(subplots=True, figsize=(20,8),  
             autopct=lambda p: '{:.1f}% \n({:.0f} members)'.format(p,(p/100)*pct.sum()), fontsize=12, 
             colors=['#4b4b4b', '#fd5c02'],labels =["Men","Women"],
             wedgeprops={'linewidth': 1.0, 'edgecolor': 'white'},
             textprops={'fontsize': '30', 'weight': 'bold'},
             startangle=45, explode=(0,0.15), labeldistance=1.1)

plt.axis("off")

title="Gender distribution of sitting members of Loksabha, 2021"
plt.title(title, fontsize=20)

footnote="*Total number of sitting members = " + str(mp_list.shape[0])
plt.figtext(0.5, 0.15, footnote, ha="center", fontsize=14, fontstyle="italic", bbox={"facecolor":"w", "alpha":0.5, "pad":5})
plt.show()

#autopct='%.1f%%'

<b>Only 7 states and 1 union territory have above average (>14.6%) women representatives.</b>

With 11 woman representatives in the loksabha, West Bengal has the largest number of woman members.

In [None]:
sns.set_style(style='white')
pcts=mp_list['State'].value_counts().sort_values(ascending=True)
pcts=pd.DataFrame(pcts)
pcts.rename(columns = {'State':'All'}, inplace = True)
pct=mp_list[mp_list['Woman']==1]['State'].value_counts().sort_values(ascending=True)
pct=pd.DataFrame(pct)
pct.rename(columns = {'State':'Women'}, inplace = True)
pct=pct.merge(pcts, left_index=True, right_index=True)
pct['pct']=round(pct['Women']*100/pct['All']).astype(int)
pct['label']=pct['Women'].astype(str)+" ... out of "+pct['All'].astype(str)+" MPs"+" ... ("+pct['pct'].astype(str)+"%)" 

pct=pct[pct['pct']>79*100/461]
pct1=pct.copy()
pct1['Men']=pct1['Women']-pct1['All']
pct1['label']=-pct1['Men']
pct1.sort_values(by=['Women','label'],ascending=[True, True],inplace=True)
pct1['label']=pct1['label'].astype(str)
pct1.loc[(pct1.label == '0'),'label']=''

fig, ax = plt.subplots(figsize=(6,8))
pct1[['Men','Women']].plot.barh(ax=ax,stacked=True, color=['#4b4b4b','#fd5c02'])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.bar_label(ax.containers[0], pct1['label'], label_type='edge', padding=5)
ax.bar_label(ax.containers[1], label_type='edge', padding=6)
ax.margins(x=0.5)
ax.set_xticks([])
ax.legend(bbox_to_anchor=(1.05, .85), loc='upper left')


plt.title('Statewise and Genderwise number of sitting members of Loksabha, 2021*', fontsize=20, pad=15)
footnote="*States with above average percent of woman members (>14.6%)"
plt.figtext(0.5, 0.05, footnote, ha="center", fontsize=14, fontstyle="italic", bbox={"facecolor":"w", "alpha":0.5, "pad":5})

plt.show()


<b>24% of the members are from the SC/ST community.</b>

In [None]:
mp_list['SC/ST']=1
mp_list.loc[(mp_list.Category == 'General'),'SC/ST']=0
pct=mp_list['SC/ST'].value_counts()
pct
plt.style.use('ggplot')

pct.plot.pie(subplots=True, figsize=(20,8),  
             autopct=lambda p: '{:.1f}% \n({:.0f} members)'.format(p,(p/100)*pct.sum()), fontsize=12, 
             colors=['#4b4b4b', '#fd5c02'],labels =["General","SC/ST"],
             wedgeprops={'linewidth': 1.0, 'edgecolor': 'white'},
             textprops={'fontsize': '30', 'weight': 'bold'},
             startangle=45, explode=(0,0.15), labeldistance=1.1)

plt.axis("off")
plt.title("Sitting members of parliament by caste, 2021", fontsize=20)

plt.show()