In [1]:
# Import libraries
import pandas as pd
import pyreadstat as pyr

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


In [2]:
def wrangle(filename):
    """
    Loads and wrangles a .sav file
    
    Parameters
    ----------
    filename : .sav file
        file that is loaded and wrangled
        
    Returns
    -------
    df : DataFrame
        Result of wrangling    
    """
    # Load file as DataFrame called df, meta is metadata which contains information on the DataFrame
    df, meta = pyr.read_sav(filename)
    # Select specific columns from the Dataframe based on the information provided in the metadata
    df = df[['RESPNO','COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']]
    # Replaces the code numbers with the actual values in each column using their actual name in the metadata
    list = ['COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']
    for i in list:
        x = meta.variable_value_labels[i]
        df[i].replace(x, inplace=True)
    # Renames columns based on information gotten from the metadata    
    df.columns = ['id', 'country', 'community', 'state', 'ctry_direction', 'ctry_present_econ',
              'rel_econ_past12m', 'fear_pol_intimidation', 'trust_police', 'pay_bribe_police']

    # Changes index to id
    df.set_index('id', inplace=True)
    
    return df

In [3]:
# Use the wrangle function
df = wrangle('afrobarometer.sav')

In [4]:
# Saves the wrangle data as a csv file
# df.to_csv('Nigeria.csv', index=False)

In [5]:
# metadata
# print(meta.column_names_to_labels)
# meta.variable_value_labels

In [7]:
df_piv = (df['state']
             .groupby(df['ctry_direction'])
             .value_counts()
             .rename('frequency')
             .to_frame()
             .reset_index()
            )

df_piv

Unnamed: 0,ctry_direction,state,frequency
0,Don't know,Nampula,103
1,Don't know,São Tomé,70
2,Don't know,Greater Accra,61
3,Don't know,Northern Province,46
4,Don't know,Plaine Wilhems,37
...,...,...,...
1080,Refused,South East,1
1081,Refused,São Vicente,1
1082,Refused,Thaba-Tseka,1
1083,Refused,Tonkpi,1


In [None]:
df_piv[df_piv['ctry_direction'] == "Going in the wrong direction"]

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's overall direction by citizens");

In [None]:
df_nigeria['ctry_present_econ'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's economic situation by citizens");

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values()

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of the change in Nigeria's economic situation by citizens");

In [None]:
df_nigeria['fear_pol_intimidation'].value_counts()

In [None]:
df_nigeria['trust_police'].value_counts()

In [None]:
df_nigeria['pay_bribe_police'].value_counts()

In [None]:
df.country.value_counts()

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True)

In [None]:
pivotal = df_nigeria[['state', 'ctry_direction']].value_counts().to_frame()

In [None]:
pivotal.reset_index(inplace=True)

In [None]:
pivotal_pivot = pivotal.pivot(index='state', columns='ctry_direction')

In [None]:
pivotal_pivot

In [None]:
dir_diff_by_state =(pivotal_pivot[0]['Going in the right direction'] - pivotal_pivot[0]['Going in the wrong direction']).to_frame()

In [None]:
dir_diff_by_state.rename(columns={0:'direction_difference'}, inplace=True)

In [None]:
dir_diff_by_state.reset_index(inplace=True)

In [None]:
plt.rcParams["figure.figsize"] = (14,10)
ax = dir_diff_by_state.dropna().sort_values(by='direction_difference').plot.barh(y='direction_difference',
                                                                                 x='state', color='green', 
                                                                                 legend=False)
plt.ylabel('State')
plt.title('Difference between Nigerians that think Nigeria is going in the right direction and those that think it is going in the wrong direction by state');


In [None]:
dir_diff_by_state.sort_values(by='direction_difference')

In [None]:
df.country.value_counts().index

# Nigerian Police

In [None]:
# Load .sav file
df, meta = pyr.read_sav('afrobarometer.sav')

In [None]:
# Police Data wrangling
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['COUNTRY','REGION','Q10A','Q10B','Q11A','Q11B','Q43G', 'Q44E', 'Q49P','Q49Q','Q49R','Q49T']
df_police = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_police[i].replace(x, inplace=True)

In [None]:
df_police.head()

In [None]:
# Rename columns in the police data
df_police.columns = ['country', 'state', 'neigborhood_safety', 'fear_of_crime',
              'stolen_from', 'attacked', 'trust_police', 'corrupt_police',
               'ease_of_police_assist','time_to_police_assist', 'paybribe_police_assist', 'paybribe_avoid_police']

In [None]:
mask = df_police['country'] == 'Nigeria'
df_nigpol = df_police[mask]

df_nigpol.head()

In [None]:
df_nigpol['stolen_from'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been stolen from?", color='green');

In [None]:
df_nigpol['attacked'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been attacked?", color='green');

In [None]:
df_nigpol['trust_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you trust the police?", color='green');

In [None]:
df_nigpol['corrupt_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of police corruption", color='green');

In [None]:
(df_nigpol[df_nigpol['ease_of_police_assist'] != 'No contact'])['ease_of_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "How easy is it to get police assistance?", color='green');

In [None]:
(df_nigpol[df_nigpol['paybribe_police_assist'] != 'No contact'])['paybribe_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to receive assistance from the police?", color='green');


In [None]:
(df_nigpol[df_nigpol['paybribe_avoid_police'] != 'No contact'])['paybribe_avoid_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to avoid the police?", color='green');

# Afrobarometer Nigeria 2021 data

In [2]:
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')

In [3]:
print(meta.column_names_to_labels)

{'RESPNO': 'Respondent number', 'URBRUR': 'Urban or Rural Primary Sampling Unit', 'REGION': 'Province or region', 'EA_SVC_A': 'EA-SVC-A. Electricity grid in the PSU/EA', 'EA_SVC_B': 'EA-SVC-B. Piped water system in the PSU/EA', 'EA_SVC_C': 'EA-SVC-C. Sewage system in the PSU/EA', 'EA_SVC_D': 'EA-SVC-D. Mobile phone service in the PSU/EA', 'EA_SVC_E': 'EA-SVC-E.  Borehole or tubewell in PSU/EA', 'EA_FAC_A': 'EA-FAC-A. Post office in the PSU/EA', 'EA_FAC_B': 'EA-FAC-B. School in the PSU/EA', 'EA_FAC_C': 'EA-FAC-C. Police station in the PSU/EA', 'EA_FAC_D': 'EA-FAC-D. Health Clinic in the PSU/EA', 'EA_FAC_E': 'EA-FAC-E. Market stalls in the PSU/EA', 'EA_FAC_F': 'EA-FAC-F. Bank or money services in the PSU/EA', 'EA_FAC_G': 'EA-FAC-G. Paid transport in the PSU/EA', 'EA_SEC_A': 'EA-SEC-A. Police in the PSU/EA', 'EA_SEC_B': 'EA-SEC-B. Soldiers/army in the PSU/EA', 'EA_SEC_C': 'EA-SEC-C. Roadblocks by police/army in the PSU/EA', 'EA_SEC_D': 'EA-SEC-D. Customs checkpoints in the PSU/EA', 'EA_SE

In [6]:
meta.variable_value_labels

{'URBRUR': {1.0: 'Urban', 2.0: 'Rural'},
 'REGION': {620.0: 'ABIA',
  621.0: 'ADAMAWA',
  622.0: 'AKWA IBOM',
  623.0: 'ANAMBRA',
  624.0: 'BAUCHI',
  625.0: 'BAYELSA',
  626.0: 'BENUE',
  627.0: 'BORNO',
  628.0: 'CROSS RIVER',
  629.0: 'DELTA',
  630.0: 'EBONYI',
  631.0: 'EDO',
  632.0: 'EKITI',
  633.0: 'ENUGU',
  634.0: 'FCT ABUJA',
  635.0: 'GOMBE',
  636.0: 'IMO',
  637.0: 'JIGAWA',
  638.0: 'KADUNA',
  639.0: 'KANO',
  640.0: 'KATSINA',
  641.0: 'KEBBI',
  642.0: 'KOGI',
  643.0: 'KWARA',
  644.0: 'LAGOS',
  645.0: 'NASARAWA',
  646.0: 'NIGER',
  647.0: 'OGUN',
  648.0: 'ONDO',
  649.0: 'OSUN',
  650.0: 'OYO',
  651.0: 'PLATEAU',
  652.0: 'RIVERS',
  653.0: 'SOKOTO',
  654.0: 'TARABA',
  655.0: 'YOBE',
  656.0: 'ZAMFARA'},
 'EA_SVC_A': {-1.0: 'Missing', 0.0: 'No', 1.0: 'Yes', 9.0: "Can't determine"},
 'EA_SVC_B': {-1.0: 'Missing', 0.0: 'No', 1.0: 'Yes', 9.0: "Can't determine"},
 'EA_SVC_C': {-1.0: 'Missing', 0.0: 'No', 1.0: 'Yes', 9.0: "Can't determine"},
 'EA_SVC_D': {-1.0: 'M

In [None]:
df.shape

## Analysis 1

In [None]:
# 1st analysis
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q1', 'Q2', 'Q4B']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    
df_new.head()

In [None]:
df_new.columns = ['State', 'Age', 'Language spoken at home', 'Present living conditions']

In [None]:
# Find row with non numeric data
df_new[df_new['Q1'] == 'Refused']
# Drop row with non numeric data
df_new = df_new.drop(index=978)

In [None]:
df_new.head()

In [None]:
df_lang = df_new['Language spoken at home'].sort_values()

In [None]:
def build_new_bar():
    # Create side-by-side bar chart
    fig = px.bar(
        data_frame=df_lang,
        title="Languages spoken at home"
    )
    # Set axis labels
    fig.update_layout(xaxis_title="Language",
                     yaxis_title="Frequency [count]",
                    )
    return fig

cb_fig = build_new_bar()
cb_fig.show()

In [None]:
df_new.State.value_counts()

## Analysis 2

In [None]:
# 2nd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q7A', 'Q7B', 'Q7C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Without food', 'Without water', 'Without medical care']

df_new.head()

## Analysis 3

In [None]:
# 3rd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q4B', 'Q5']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Living condition', 'Treated unfairly by govt based on econ status']

df_new.head()

## Analysis 4

In [None]:
# 4th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q9', 'Q10A', 'Q10B', 'Q10C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Discuss politics', 'Freedom to say what you think', 'Freedom to join political organization', 'Freedom to chose who to vote for']

df_new.head()

In [None]:
print(meta.column_names_to_labels)

## Analysis 5

In [None]:
# 5th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q11A', 'Q11B', 'Q11C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Attend community meeting', 'Join others to raise an issue', 'Attend a demonstration']

df_new.head()

## Analysis 6

In [None]:
# 6th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q12A', 'Q12B', 'Q12C', 'Q12D']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Contact local government councillor', 'Contact MP', 'Contact political party official', 'Contact traditional leader']

df_new.head()

## Analysis 7

In [None]:
# 7th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q13', 'Q14']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Voted in last election', 'Freeness and fairness of last election']

df_new.head()

## Analysis 8

In [None]:
# 8th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q15A', 'Q15B', 'Q15C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Attended campaign rally', 'Worked for candidate or party', 'Contacted by party rep']

df_new.head()

## Analysis 9

In [7]:
# 9th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q15D_1', 'Q15D_2', 'Q15D_3', 'Q15D_4', "Q15D_5", 'Q15D_6', 'Q15D_7', 'Q15D_8']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Response 1', 'Response 2', 'Response 3', 'Response 4', 'Response 5', 'Response 6', 'Response 7', 'Response 8']

df_new.head()

Unnamed: 0,State,Response 1,Response 2,Response 3,Response 4,Response 5,Response 6,Response 7,Response 8
0,IMO,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable
1,FCT ABUJA,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable
2,FCT ABUJA,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable
3,FCT ABUJA,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable
4,FCT ABUJA,People’s Democratic Party (PDP),No further reply,No further reply,No further reply,No further reply,No further reply,No further reply,No further reply


## 10th analysis

In [2]:
# 10th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q16A', 'Q16B']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Was media coverage fair?', 'Offered bribe for vote?']

df_new.head()

Unnamed: 0,State,Was media coverage fair?,Offered bribe for vote?
0,IMO,Often,Never
1,FCT ABUJA,Sometimes,Never
2,FCT ABUJA,Often,Once or twice
3,FCT ABUJA,Sometimes,Once or twice
4,FCT ABUJA,Never,Once or twice


## 11th Analysis

In [3]:
# 11th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q17A', 'Q17B', 'Q17C', 'Q17D']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Inaccurately counted votes?', 'People voted more than once?', 'Can the powerful find out your vote?', 'Did you fear political intimidation or violence?']

df_new.head()

Unnamed: 0,State,Inaccurately counted votes?,People voted more than once?,Can the powerful find out your vote?,Did you fear political intimidation or violence?
0,IMO,Never,Don’t know,Not at all likely,A lot
1,FCT ABUJA,Often,A few times,Very likely,A lot
2,FCT ABUJA,A few times,Never,Very likely,Not at all
3,FCT ABUJA,A few times,Never,Not very likely,A little bit
4,FCT ABUJA,A few times,A few times,Very likely,A lot


## 12th Analysis

In [2]:
# 12th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q18A', 'Q18B', 'Q18C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Police assist people to cast ballot?', 'Voter intimidation?', 'Annouced results reflect counted results?']

df_new.head()

Unnamed: 0,State,Police assist people to cast ballot?,Voter intimidation?,Annouced results reflect counted results?
0,IMO,No,Yes,"Mostly accurate, but with some minor discrepan..."
1,FCT ABUJA,No,Yes,Not accurate at all
2,FCT ABUJA,No,No,Completely accurate
3,FCT ABUJA,No,No,"Mostly accurate, but with some minor discrepan..."
4,FCT ABUJA,No,No,Not accurate at all


## 13th Analysis


In [3]:
# 13th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q19A', 'Q19B']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Government bans organizations vs join any', 'Media free to publish vs government control']

df_new.head()

Unnamed: 0,State,Government bans organizations vs join any,Media free to publish vs government control
0,IMO,Agree very strongly with 1,Agree very strongly with 2
1,FCT ABUJA,Agree very strongly with 2,Agree very strongly with 1
2,FCT ABUJA,Agree very strongly with 1,Agree very strongly with 2
3,FCT ABUJA,Agree with 1,Agree with 2
4,FCT ABUJA,Agree with 2,Agree very strongly with 1


## 14th Analysis

In [2]:
# 14th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q20A', 'Q20B', 'Q20C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Reject one-party rule', 'Reject military rule', 'Reject one-man rule']

df_new.head()

Unnamed: 0,State,Reject one-party rule,Reject military rule,Reject one-man rule
0,IMO,Disapprove,Disapprove,Don’t know
1,FCT ABUJA,Strongly disapprove,Strongly disapprove,Strongly disapprove
2,FCT ABUJA,Strongly approve,Approve,Strongly disapprove
3,FCT ABUJA,Neither approve nor disapprove,Neither approve nor disapprove,Neither approve nor disapprove
4,FCT ABUJA,Strongly disapprove,Strongly disapprove,Strongly disapprove


## 15th Analysis

In [3]:
# 15th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q21']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Support Democracy']

df_new.head()

Unnamed: 0,State,Support Democracy
0,IMO,STATEMENT 3: Doesn't matter
1,FCT ABUJA,STATEMENT 1: Democracy preferable
2,FCT ABUJA,STATEMENT 1: Democracy preferable
3,FCT ABUJA,STATEMENT 2: Sometimes non-democratic preferable
4,FCT ABUJA,STATEMENT 1: Democracy preferable
