In [1]:
# Import libraries
import pandas as pd
import pyreadstat as pyr

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


In [3]:
def wrangle(filename):
    """
    Loads and wrangles a .sav file
    
    Parameters
    ----------
    filename : .sav file
        file that is loaded and wrangled
        
    Returns
    -------
    df : DataFrame
        Result of wrangling    
    """
    # Load file as DataFrame called df, meta is metadata which contains information on the DataFrame
    df, meta = pyr.read_sav(filename)
    # Select specific columns from the Dataframe based on the information provided in the metadata
    df = df[['RESPNO','COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']]
    # Replaces the code numbers with the actual values in each column using their actual name in the metadata
    list = ['COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']
    for i in list:
        x = meta.variable_value_labels[i]
        df[i].replace(x, inplace=True)
    # Renames columns based on information gotten from the metadata    
    df.columns = ['id', 'country', 'community', 'state', 'ctry_direction', 'ctry_present_econ',
              'rel_econ_past12m', 'fear_pol_intimidation', 'trust_police', 'pay_bribe_police']

    # Changes index to id
    df.set_index('id', inplace=True)
    
    return df

In [10]:
# Use the wrangle function
df = wrangle('afrobarometer.sav')

In [12]:
# Saves the wrangle data as a csv file
# df.to_csv('Nigeria.csv', index=False)

In [1]:
# metadata
# print(meta.column_names_to_labels)
# meta.variable_value_labels

In [None]:
df_piv = (df_nigeria['state']
             .groupby(df_nigeria['ctry_direction'])
             .value_counts()
             .rename('frequency')
             .to_frame()
             .reset_index()
            )

df_piv

In [None]:
df_piv[df_piv['ctry_direction'] == "Going in the wrong direction"]

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's overall direction by citizens");

In [None]:
df_nigeria['ctry_present_econ'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's economic situation by citizens");

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values()

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of the change in Nigeria's economic situation by citizens");

In [None]:
df_nigeria['fear_pol_intimidation'].value_counts()

In [None]:
df_nigeria['trust_police'].value_counts()

In [None]:
df_nigeria['pay_bribe_police'].value_counts()

In [None]:
df.country.value_counts()

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True)

In [None]:
pivotal = df_nigeria[['state', 'ctry_direction']].value_counts().to_frame()

In [None]:
pivotal.reset_index(inplace=True)

In [None]:
pivotal_pivot = pivotal.pivot(index='state', columns='ctry_direction')

In [None]:
pivotal_pivot

In [None]:
dir_diff_by_state =(pivotal_pivot[0]['Going in the right direction'] - pivotal_pivot[0]['Going in the wrong direction']).to_frame()

In [None]:
dir_diff_by_state.rename(columns={0:'direction_difference'}, inplace=True)

In [None]:
dir_diff_by_state.reset_index(inplace=True)

In [None]:
plt.rcParams["figure.figsize"] = (14,10)
ax = dir_diff_by_state.dropna().sort_values(by='direction_difference').plot.barh(y='direction_difference',
                                                                                 x='state', color='green', 
                                                                                 legend=False)
plt.ylabel('State')
plt.title('Difference between Nigerians that think Nigeria is going in the right direction and those that think it is going in the wrong direction by state');


In [None]:
dir_diff_by_state.sort_values(by='direction_difference')

In [None]:
df.country.value_counts().index

In [None]:
pip install plotly

# Nigerian Police

In [None]:
# Load .sav file
df, meta = pyr.read_sav('afrobarometer.sav')

In [None]:
# Police Data wrangling
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['COUNTRY','REGION','Q10A','Q10B','Q11A','Q11B','Q43G', 'Q44E', 'Q49P','Q49Q','Q49R','Q49T']
df_police = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_police[i].replace(x, inplace=True)

In [None]:
df_police.head()

In [None]:
# Rename columns in the police data
df_police.columns = ['country', 'state', 'neigborhood_safety', 'fear_of_crime',
              'stolen_from', 'attacked', 'trust_police', 'corrupt_police',
               'ease_of_police_assist','time_to_police_assist', 'paybribe_police_assist', 'paybribe_avoid_police']

In [None]:
mask = df_police['country'] == 'Nigeria'
df_nigpol = df_police[mask]

df_nigpol.head()

In [None]:
df_nigpol['stolen_from'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been stolen from?", color='green');

In [None]:
df_nigpol['attacked'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been attacked?", color='green');

In [None]:
df_nigpol['trust_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you trust the police?", color='green');

In [None]:
df_nigpol['corrupt_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of police corruption", color='green');

In [None]:
(df_nigpol[df_nigpol['ease_of_police_assist'] != 'No contact'])['ease_of_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "How easy is it to get police assistance?", color='green');

In [None]:
(df_nigpol[df_nigpol['paybribe_police_assist'] != 'No contact'])['paybribe_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to receive assistance from the police?", color='green');


In [None]:
(df_nigpol[df_nigpol['paybribe_avoid_police'] != 'No contact'])['paybribe_avoid_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to avoid the police?", color='green');

# Afrobarometer Nigeria 2021 data

In [15]:
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')

In [4]:
print(meta.column_names_to_labels)

{'RESPNO': 'Respondent number', 'URBRUR': 'Urban or Rural Primary Sampling Unit', 'REGION': 'Province or region', 'EA_SVC_A': 'EA-SVC-A. Electricity grid in the PSU/EA', 'EA_SVC_B': 'EA-SVC-B. Piped water system in the PSU/EA', 'EA_SVC_C': 'EA-SVC-C. Sewage system in the PSU/EA', 'EA_SVC_D': 'EA-SVC-D. Mobile phone service in the PSU/EA', 'EA_SVC_E': 'EA-SVC-E.  Borehole or tubewell in PSU/EA', 'EA_FAC_A': 'EA-FAC-A. Post office in the PSU/EA', 'EA_FAC_B': 'EA-FAC-B. School in the PSU/EA', 'EA_FAC_C': 'EA-FAC-C. Police station in the PSU/EA', 'EA_FAC_D': 'EA-FAC-D. Health Clinic in the PSU/EA', 'EA_FAC_E': 'EA-FAC-E. Market stalls in the PSU/EA', 'EA_FAC_F': 'EA-FAC-F. Bank or money services in the PSU/EA', 'EA_FAC_G': 'EA-FAC-G. Paid transport in the PSU/EA', 'EA_SEC_A': 'EA-SEC-A. Police in the PSU/EA', 'EA_SEC_B': 'EA-SEC-B. Soldiers/army in the PSU/EA', 'EA_SEC_C': 'EA-SEC-C. Roadblocks by police/army in the PSU/EA', 'EA_SEC_D': 'EA-SEC-D. Customs checkpoints in the PSU/EA', 'EA_SE

In [8]:
#meta.variable_value_labels

In [7]:
df.shape

(1599, 389)

## Analysis 1

In [32]:
# 1st analysis
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q1', 'Q2', 'Q4B']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    
df_new.head()

Unnamed: 0,REGION,Q1,Q2,Q4B
0,IMO,26.0,Igbo,Fairly Good
1,FCT ABUJA,25.0,Other,Fairly Bad
2,FCT ABUJA,35.0,Hausa,Very good
3,FCT ABUJA,79.0,Other,Neither good nor bad
4,FCT ABUJA,19.0,English,Fairly Good


In [33]:
df_new.columns = ['State', 'Age', 'Language spoken at home', 'Present living conditions']

In [27]:
# Find row with non numeric data
df_new[df_new['Q1'] == 'Refused']
# Drop row with non numeric data
df_new = df_new.drop(index=978)

Unnamed: 0,REGION,Q1,Q2


In [34]:
df_new.head()

Unnamed: 0,State,Age,Language spoken at home,Present living conditions
0,IMO,26.0,Igbo,Fairly Good
1,FCT ABUJA,25.0,Other,Fairly Bad
2,FCT ABUJA,35.0,Hausa,Very good
3,FCT ABUJA,79.0,Other,Neither good nor bad
4,FCT ABUJA,19.0,English,Fairly Good


In [42]:
df_lang = df_new['Language spoken at home'].sort_values()

In [44]:
def build_new_bar():
    # Create side-by-side bar chart
    fig = px.bar(
        data_frame=df_lang,
        title="Languages spoken at home"
    )
    # Set axis labels
    fig.update_layout(xaxis_title="Language",
                     yaxis_title="Frequency [count]",
                    )
    return fig

cb_fig = build_new_bar()
cb_fig.show()

In [46]:
df_new.State.value_counts()

KANO           112
LAGOS          104
KATSINA         72
BAUCHI          64
KADUNA          64
OYO             64
JIGAWA          56
RIVERS          56
NIGER           48
ZAMFARA         48
BENUE           48
BORNO           48
SOKOTO          48
OGUN            48
ANAMBRA         40
KEBBI           40
ONDO            40
DELTA           40
AKWA IBOM       40
IMO             40
OSUN            32
KOGI            32
ABIA            32
PLATEAU         32
GOMBE           32
EDO             32
CROSS RIVER     32
ADAMAWA         32
ENUGU           32
YOBE            24
BAYELSA         24
TARABA          24
EBONYI          24
FCT ABUJA       24
EKITI           24
KWARA           24
NASARAWA        23
Name: State, dtype: int64

## Analysis 2

In [6]:
# 2nd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q7A', 'Q7B', 'Q7C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Without food', 'Without water', 'Without medical care']

df_new.head()

Unnamed: 0,State,Without food,Without water,Without medical care
0,IMO,Just once or twice,Several times,Many times
1,FCT ABUJA,Several times,Several times,Several times
2,FCT ABUJA,Several times,Never,Just once or twice
3,FCT ABUJA,Never,Never,Never
4,FCT ABUJA,Never,Never,Never


## Analysis 3

In [3]:
# 3rd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q4B', 'Q5']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Living condition', 'Treated unfairly by govt based on econ status']

df_new.head()

Unnamed: 0,State,Living condition,Treated unfairly by govt based on econ status
0,IMO,Fairly Good,Never
1,FCT ABUJA,Fairly Bad,Always
2,FCT ABUJA,Very good,Often
3,FCT ABUJA,Neither good nor bad,Often
4,FCT ABUJA,Fairly Good,Often


## Analysis 4

In [2]:
# 4th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q9', 'Q10A', 'Q10B', 'Q10C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Discuss politics', 'Freedom to say what you think', 'Freedom to join political organization', 'Freedom to chose who to vote for']

df_new.head()

Unnamed: 0,State,Discuss politics,Freedom to say what you think,Freedom to join political organization,Freedom to chose who to vote for
0,IMO,Occasionally,Not at all free,Not at all free,Somewhat free
1,FCT ABUJA,Frequently,Not very free,Completely free,Not very free
2,FCT ABUJA,Occasionally,Not at all free,Not at all free,Not at all free
3,FCT ABUJA,Never,Not at all free,Not at all free,Not at all free
4,FCT ABUJA,Occasionally,Somewhat free,Completely free,Completely free


In [3]:
print(meta.column_names_to_labels)

{'RESPNO': 'Respondent number', 'URBRUR': 'Urban or Rural Primary Sampling Unit', 'REGION': 'Province or region', 'EA_SVC_A': 'EA-SVC-A. Electricity grid in the PSU/EA', 'EA_SVC_B': 'EA-SVC-B. Piped water system in the PSU/EA', 'EA_SVC_C': 'EA-SVC-C. Sewage system in the PSU/EA', 'EA_SVC_D': 'EA-SVC-D. Mobile phone service in the PSU/EA', 'EA_SVC_E': 'EA-SVC-E.  Borehole or tubewell in PSU/EA', 'EA_FAC_A': 'EA-FAC-A. Post office in the PSU/EA', 'EA_FAC_B': 'EA-FAC-B. School in the PSU/EA', 'EA_FAC_C': 'EA-FAC-C. Police station in the PSU/EA', 'EA_FAC_D': 'EA-FAC-D. Health Clinic in the PSU/EA', 'EA_FAC_E': 'EA-FAC-E. Market stalls in the PSU/EA', 'EA_FAC_F': 'EA-FAC-F. Bank or money services in the PSU/EA', 'EA_FAC_G': 'EA-FAC-G. Paid transport in the PSU/EA', 'EA_SEC_A': 'EA-SEC-A. Police in the PSU/EA', 'EA_SEC_B': 'EA-SEC-B. Soldiers/army in the PSU/EA', 'EA_SEC_C': 'EA-SEC-C. Roadblocks by police/army in the PSU/EA', 'EA_SEC_D': 'EA-SEC-D. Customs checkpoints in the PSU/EA', 'EA_SE

## Analysis 5

In [4]:
# 5th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q11A', 'Q11B', 'Q11C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Attend community meeting', 'Join others to raise an issue', 'Attend a demonstration']

df_new.head()

Unnamed: 0,State,Attend community meeting,Join others to raise an issue,Attend a demonstration
0,IMO,"No, would never do this","Yes, several times","No, would never do this"
1,FCT ABUJA,"Yes, often","Yes, often","No, would never do this"
2,FCT ABUJA,"Yes, several times","No, would never do this","No, would never do this"
3,FCT ABUJA,"Yes, several times","Yes, several times","Yes, often"
4,FCT ABUJA,"No, would do it if I had the chance","Yes, once or twice","No, would do it if I had the chance"


## Analysis 6

In [2]:
# 6th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q12A', 'Q12B', 'Q12C', 'Q12D']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Contact local government councillor', 'Contact MP', 'Contact political party official', 'Contact traditional leader']

df_new.head()

Unnamed: 0,State,Contact local government councillor,Contact MP,Contact political party official,Contact traditional leader
0,IMO,Never,Never,Never,Never
1,FCT ABUJA,Never,Never,A few times,A few times
2,FCT ABUJA,A few times,Never,Don’t know,Never
3,FCT ABUJA,Never,Never,Never,Never
4,FCT ABUJA,Never,Never,Never,Never
