In [3]:
# Importing Dependencies 
import pandas as pd

In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# InteractiveShell.ast_node_interactivity = "last_expr"

In [5]:
# Read data from an excel file to a pandas dataframe 
election_data_unclean = pd.read_excel('../Data/ResultatElection2019.xlsx')

#### Let's familiarize ourselves with the data <br> [source: CENI](http://www.ceni.mr/node/91)

In [6]:
# Print first five rows
election_data_unclean.head()

Unnamed: 0,codeWilaya,libWilaya_ar,libMoughataaAr,libCommune_ar,libelleCenter_ar,libWilaya,libMoughataaFr,libCommune,libelleCenter,N Centre,N Bureau,nbInscrits,nbVotant,nbVoteNull,nbVoteNeutre,nbSuffrage,nbVoix,Candidat,CandidatAr
0,10,كيدي ماغا,غابو,بادجيم,مدرسة كليل,Guidimagha,Ghabou,Baydjam,Ecole Keleyle,7,1,164,120,5,0,115,96,Mohamed Cheïkh Mohamed Ahmed ElGHAZOUANI (Ghaz...,محمد الشيخ محمد أحمد الشيخ الغزواني
1,10,كيدي ماغا,غابو,بادجيم,مدرسة كليل,Guidimagha,Ghabou,Baydjam,Ecole Keleyle,7,1,164,120,5,0,115,13,Sidi Mohamed Boubacar BOUSSALEF (Boubacar),سيدي محمد بوبكر بوسالف
2,10,كيدي ماغا,غابو,بادجيم,مدرسة كليل,Guidimagha,Ghabou,Baydjam,Ecole Keleyle,7,1,164,120,5,0,115,6,Biram Dah Dah ABEID (Biram),برام الداه الداه اعبيد
3,10,كيدي ماغا,غابو,بادجيم,مدرسة كليل,Guidimagha,Ghabou,Baydjam,Ecole Keleyle,7,1,164,120,5,0,115,0,Mohamed Sidi MAOULOUD (Maouloud),محمد سيدي مولود
4,10,كيدي ماغا,غابو,بادجيم,مدرسة كليل,Guidimagha,Ghabou,Baydjam,Ecole Keleyle,7,1,164,120,5,0,115,0,Mohamed Lemine El Mourteji El WAVI (Wavi),محمد الامين المرتجي الوافي


In [7]:
print(f'the number of unique voting offices: {int(23166/6)}')

the number of unique voting offices: 3861


In [8]:
# print information about the dataframe, number of columns, their names, number of entries, dtypes and memory usage
election_data_unclean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23166 entries, 0 to 23165
Data columns (total 19 columns):
codeWilaya          23166 non-null int64
libWilaya_ar        23166 non-null object
libMoughataaAr      23166 non-null object
libCommune_ar       23166 non-null object
libelleCenter_ar    23166 non-null object
libWilaya           23166 non-null object
libMoughataaFr      23166 non-null object
libCommune          23166 non-null object
libelleCenter       23166 non-null object
N Centre            23166 non-null int64
N Bureau            23166 non-null int64
nbInscrits          23166 non-null int64
nbVotant            23166 non-null int64
nbVoteNull          23166 non-null int64
nbVoteNeutre        23166 non-null int64
nbSuffrage          23166 non-null int64
nbVoix              23166 non-null int64
Candidat            23166 non-null object
CandidatAr          23166 non-null object
dtypes: int64(9), object(10)
memory usage: 3.4+ MB


In [9]:
# Check for null values in any of the columns 
election_data_unclean.isnull().any()

codeWilaya          False
libWilaya_ar        False
libMoughataaAr      False
libCommune_ar       False
libelleCenter_ar    False
libWilaya           False
libMoughataaFr      False
libCommune          False
libelleCenter       False
N Centre            False
N Bureau            False
nbInscrits          False
nbVotant            False
nbVoteNull          False
nbVoteNeutre        False
nbSuffrage          False
nbVoix              False
Candidat            False
CandidatAr          False
dtype: bool

In [10]:
# print candidate names
candidates_list = election_data_unclean["Candidat"].unique()
candidates_list

array(['Mohamed Cheïkh Mohamed Ahmed ElGHAZOUANI (Ghazouani)',
       'Sidi Mohamed Boubacar BOUSSALEF (Boubacar)',
       'Biram Dah Dah ABEID (Biram)', 'Mohamed Sidi MAOULOUD (Maouloud)',
       'Mohamed Lemine El Mourteji El WAVI (Wavi)',
       'KANE Hamidou Baba (Kane)'], dtype=object)

In [11]:
# get unique states + overseas
states_list = election_data_unclean['libWilaya'].unique()
states_list

array(['Guidimagha', 'Tagant', 'Brakna', 'Hodh El Gharbi',
       'Dakhlet Nouadhibou', 'Tiris Zemmour', 'Trarza', 'Hodh Chargui',
       'Assaba', 'Gorgol', 'Nouakchott Ouest', 'Nouakchott Sud',
       'Inchiri', 'Adrar', 'Nouakchott Nord', 'Etranger'], dtype=object)

In [12]:
# print column names
election_data_unclean.columns.values

array(['codeWilaya', 'libWilaya_ar', 'libMoughataaAr', 'libCommune_ar',
       'libelleCenter_ar', 'libWilaya', 'libMoughataaFr', 'libCommune',
       'libelleCenter', 'N Centre', 'N Bureau', 'nbInscrits', 'nbVotant',
       'nbVoteNull', 'nbVoteNeutre', 'nbSuffrage', 'nbVoix', 'Candidat',
       'CandidatAr'], dtype=object)

#### Data manipulation

In [13]:
# drop unwanted columns
election_data_unclean = election_data_unclean.drop(['libWilaya_ar',
                                                    'libMoughataaAr',
                                                    'libCommune_ar',
                                                    'libelleCenter_ar',
                                                    'CandidatAr'], axis = 1)

In [14]:
# list with english column names
english_headers_list = [
                        'state_code',
                        'state',
                        'county',
                        'town',
                        'center',
                        'center_id',
                        'office_id',
                        'registered',
                        'voted',
                        'void',
                        'neutral',
                        'suffrage',
                        'candidate_votes',
                        'candidate'
                                        ]

In [15]:
# rename dataframe's headers
election_data_unclean.columns = english_headers_list

In [16]:
election_data_unclean.tail()

Unnamed: 0,state_code,state,county,town,center,center_id,office_id,registered,voted,void,neutral,suffrage,candidate_votes,candidate
23161,4,Gorgol,Kaédi,Tokomadji,Tokomadji,7,1,321,231,2,4,225,17,Sidi Mohamed Boubacar BOUSSALEF (Boubacar)
23162,4,Gorgol,Kaédi,Tokomadji,Tokomadji,7,1,321,231,2,4,225,69,Biram Dah Dah ABEID (Biram)
23163,4,Gorgol,Kaédi,Tokomadji,Tokomadji,7,1,321,231,2,4,225,3,Mohamed Sidi MAOULOUD (Maouloud)
23164,4,Gorgol,Kaédi,Tokomadji,Tokomadji,7,1,321,231,2,4,225,0,Mohamed Lemine El Mourteji El WAVI (Wavi)
23165,4,Gorgol,Kaédi,Tokomadji,Tokomadji,7,1,321,231,2,4,225,77,KANE Hamidou Baba (Kane)


In [17]:
# dtypes dict 
convert_dtypes = {'registered':int,
                  'voted': int,
                  'void': int,
                  'neutral': int,
                  'suffrage': int,
                  'candidate_votes': int}

# change dtypes 
election_data_unclean = election_data_unclean.astype(convert_dtypes)
election_data_unclean.dtypes

# 
election_data = election_data_unclean

state_code          int64
state              object
county             object
town               object
center             object
center_id           int64
office_id           int64
registered          int64
voted               int64
void                int64
neutral             int64
suffrage            int64
candidate_votes     int64
candidate          object
dtype: object

In [18]:
election_data_unclean[election_data_unclean['state']=='Etranger'].head()

Unnamed: 0,state_code,state,county,town,center,center_id,office_id,registered,voted,void,neutral,suffrage,candidate_votes,candidate
7542,0,Etranger,Asie,Emirats Arabes Unis,Interieur 1,2,1,465,255,3,10,242,165,Mohamed Cheïkh Mohamed Ahmed ElGHAZOUANI (Ghaz...
7543,0,Etranger,Asie,Emirats Arabes Unis,Interieur 1,2,1,465,255,3,10,242,47,Sidi Mohamed Boubacar BOUSSALEF (Boubacar)
7544,0,Etranger,Asie,Emirats Arabes Unis,Interieur 1,2,1,465,255,3,10,242,7,Biram Dah Dah ABEID (Biram)
7545,0,Etranger,Asie,Emirats Arabes Unis,Interieur 1,2,1,465,255,3,10,242,22,Mohamed Sidi MAOULOUD (Maouloud)
7546,0,Etranger,Asie,Emirats Arabes Unis,Interieur 1,2,1,465,255,3,10,242,0,Mohamed Lemine El Mourteji El WAVI (Wavi)


In [19]:
# # save as csv
# election_data.to_csv(path_or_buf='../Data/election_data.csv')

In [20]:
mauri_states = ['Guidimagha', 'Tagant', 'Brakna', 'Hodh El Gharbi',
       'Dakhlet Nouadhibou', 'Tiris Zemmour', 'Trarza', 'Hodh Chargui',
       'Assaba', 'Gorgol', 'Nouakchott Ouest', 'Nouakchott Sud',
       'Inchiri', 'Adrar', 'Nouakchott Nord',]

In [21]:
# df with on mauritanian states, not including etranger 
mauri_states_df = election_data[election_data.state.isin(mauri_states)]
mauri_states_df.shape

(22896, 14)

In [22]:
# gaz = election_data[(election_data.candidate == candidates_list[0]) & (election_data.voted > 200)]
gaz = election_data[(election_data.candidate == candidates_list[0])]

In [23]:
gaz.candidate_votes.sum() / gaz.suffrage.sum() 

0.5200761855570262

In [26]:
guid_gaz_df = election_data[(election_data.state == mauri_states[0]) & (election_data.candidate == candidates_list[0])]