In [1]:
# Import libraries
import pandas as pd
import pyreadstat as pyr

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


In [2]:
def wrangle(filename):
    """
    Loads and wrangles a .sav file
    
    Parameters
    ----------
    filename : .sav file
        file that is loaded and wrangled
        
    Returns
    -------
    df : DataFrame
        Result of wrangling    
    """
    # Load file as DataFrame called df, meta is metadata which contains information on the DataFrame
    df, meta = pyr.read_sav(filename)
    # Select specific columns from the Dataframe based on the information provided in the metadata
    df = df[['RESPNO','COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']]
    # Replaces the code numbers with the actual values in each column using their actual name in the metadata
    list = ['COUNTRY', 'URBRUR', 'REGION', 'Q3', 'Q4A', 'Q6', 'Q40', 'Q43G', 'Q49T']
    for i in list:
        x = meta.variable_value_labels[i]
        df[i].replace(x, inplace=True)
    # Renames columns based on information gotten from the metadata    
    df.columns = ['id', 'country', 'community', 'state', 'ctry_direction', 'ctry_present_econ',
              'rel_econ_past12m', 'fear_pol_intimidation', 'trust_police', 'pay_bribe_police']

    # Changes index to id
    df.set_index('id', inplace=True)
    
    return df

In [3]:
# Use the wrangle function
df = wrangle('afrobarometer.sav')

In [4]:
# Saves the wrangle data as a csv file
# df.to_csv('Nigeria.csv', index=False)

In [5]:
# metadata
# print(meta.column_names_to_labels)
# meta.variable_value_labels

In [7]:
df_piv = (df['state']
             .groupby(df['ctry_direction'])
             .value_counts()
             .rename('frequency')
             .to_frame()
             .reset_index()
            )

df_piv

Unnamed: 0,ctry_direction,state,frequency
0,Don't know,Nampula,103
1,Don't know,São Tomé,70
2,Don't know,Greater Accra,61
3,Don't know,Northern Province,46
4,Don't know,Plaine Wilhems,37
...,...,...,...
1080,Refused,South East,1
1081,Refused,São Vicente,1
1082,Refused,Thaba-Tseka,1
1083,Refused,Tonkpi,1


In [None]:
df_piv[df_piv['ctry_direction'] == "Going in the wrong direction"]

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's overall direction by citizens");

In [None]:
df_nigeria['ctry_present_econ'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of Nigeria's economic situation by citizens");

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values()

In [None]:
df_nigeria['rel_econ_past12m'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of the change in Nigeria's economic situation by citizens");

In [None]:
df_nigeria['fear_pol_intimidation'].value_counts()

In [None]:
df_nigeria['trust_police'].value_counts()

In [None]:
df_nigeria['pay_bribe_police'].value_counts()

In [None]:
df.country.value_counts()

In [None]:
df_nigeria['ctry_direction'].value_counts(normalize=True)

In [None]:
pivotal = df_nigeria[['state', 'ctry_direction']].value_counts().to_frame()

In [None]:
pivotal.reset_index(inplace=True)

In [None]:
pivotal_pivot = pivotal.pivot(index='state', columns='ctry_direction')

In [None]:
pivotal_pivot

In [None]:
dir_diff_by_state =(pivotal_pivot[0]['Going in the right direction'] - pivotal_pivot[0]['Going in the wrong direction']).to_frame()

In [None]:
dir_diff_by_state.rename(columns={0:'direction_difference'}, inplace=True)

In [None]:
dir_diff_by_state.reset_index(inplace=True)

In [None]:
plt.rcParams["figure.figsize"] = (14,10)
ax = dir_diff_by_state.dropna().sort_values(by='direction_difference').plot.barh(y='direction_difference',
                                                                                 x='state', color='green', 
                                                                                 legend=False)
plt.ylabel('State')
plt.title('Difference between Nigerians that think Nigeria is going in the right direction and those that think it is going in the wrong direction by state');


In [None]:
dir_diff_by_state.sort_values(by='direction_difference')

In [None]:
df.country.value_counts().index

# Nigerian Police

In [None]:
# Load .sav file
df, meta = pyr.read_sav('afrobarometer.sav')

In [None]:
# Police Data wrangling
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['COUNTRY','REGION','Q10A','Q10B','Q11A','Q11B','Q43G', 'Q44E', 'Q49P','Q49Q','Q49R','Q49T']
df_police = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_police[i].replace(x, inplace=True)

In [None]:
df_police.head()

In [None]:
# Rename columns in the police data
df_police.columns = ['country', 'state', 'neigborhood_safety', 'fear_of_crime',
              'stolen_from', 'attacked', 'trust_police', 'corrupt_police',
               'ease_of_police_assist','time_to_police_assist', 'paybribe_police_assist', 'paybribe_avoid_police']

In [None]:
mask = df_police['country'] == 'Nigeria'
df_nigpol = df_police[mask]

df_nigpol.head()

In [None]:
df_nigpol['stolen_from'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been stolen from?", color='green');

In [None]:
df_nigpol['attacked'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Have you been attacked?", color='green');

In [None]:
df_nigpol['trust_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you trust the police?", color='green');

In [None]:
df_nigpol['corrupt_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Perception of police corruption", color='green');

In [None]:
(df_nigpol[df_nigpol['ease_of_police_assist'] != 'No contact'])['ease_of_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "How easy is it to get police assistance?", color='green');

In [None]:
(df_nigpol[df_nigpol['paybribe_police_assist'] != 'No contact'])['paybribe_police_assist'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to receive assistance from the police?", color='green');


In [None]:
(df_nigpol[df_nigpol['paybribe_avoid_police'] != 'No contact'])['paybribe_avoid_police'].value_counts(normalize=True).sort_values().plot(kind='barh',
                                                                            title= "Do you pay bribe to avoid the police?", color='green');

# Afrobarometer Nigeria 2021 data

In [None]:
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')

In [None]:
print(meta.column_names_to_labels)

In [None]:
#meta.variable_value_labels

In [None]:
df.shape

## Analysis 1

In [None]:
# 1st analysis
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q1', 'Q2', 'Q4B']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    
df_new.head()

In [None]:
df_new.columns = ['State', 'Age', 'Language spoken at home', 'Present living conditions']

In [None]:
# Find row with non numeric data
df_new[df_new['Q1'] == 'Refused']
# Drop row with non numeric data
df_new = df_new.drop(index=978)

In [None]:
df_new.head()

In [None]:
df_lang = df_new['Language spoken at home'].sort_values()

In [None]:
def build_new_bar():
    # Create side-by-side bar chart
    fig = px.bar(
        data_frame=df_lang,
        title="Languages spoken at home"
    )
    # Set axis labels
    fig.update_layout(xaxis_title="Language",
                     yaxis_title="Frequency [count]",
                    )
    return fig

cb_fig = build_new_bar()
cb_fig.show()

In [None]:
df_new.State.value_counts()

## Analysis 2

In [None]:
# 2nd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q7A', 'Q7B', 'Q7C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Without food', 'Without water', 'Without medical care']

df_new.head()

## Analysis 3

In [None]:
# 3rd analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q4B', 'Q5']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Living condition', 'Treated unfairly by govt based on econ status']

df_new.head()

## Analysis 4

In [None]:
# 4th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q9', 'Q10A', 'Q10B', 'Q10C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Discuss politics', 'Freedom to say what you think', 'Freedom to join political organization', 'Freedom to chose who to vote for']

df_new.head()

In [None]:
print(meta.column_names_to_labels)

## Analysis 5

In [None]:
# 5th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q11A', 'Q11B', 'Q11C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Attend community meeting', 'Join others to raise an issue', 'Attend a demonstration']

df_new.head()

## Analysis 6

In [None]:
# 6th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q12A', 'Q12B', 'Q12C', 'Q12D']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Contact local government councillor', 'Contact MP', 'Contact political party official', 'Contact traditional leader']

df_new.head()

## Analysis 7

In [None]:
# 7th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q13', 'Q14']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Voted in last election', 'Freeness and fairness of last election']

df_new.head()

## Analysis 8

In [None]:
# 8th analysis
#load df
df, meta = pyr.read_sav('afrobarometer_nigeria_rd8_2021.sav')
# Select specific columns from the Dataframe based on the information provided in the metadata
# and Replaces the code numbers with the actual values in each column using their actual name in the metadata
columns = ['REGION','Q15A', 'Q15B', 'Q15C']
df_new = df.copy()[columns]
for i in columns:
    x = meta.variable_value_labels[i]
    df_new[i].replace(x, inplace=True)
    


# Rename columns
df_new.columns = ['State', 'Attended campaign rally', 'Worked for candidate or party', 'Contacted by party rep']

df_new.head()