# Creating the Political Averages Data

This code will average data over 5 year periods and then lag the data to create average effects size. 

In [1]:
# Loading Packages
import pandas as pd
import numpy as np

In [2]:
# Loading Data
Political_Data = pd.read_csv('Data/Clean/Combined/Political-Data.csv')
Political_Data.head()

Unnamed: 0,Country,Year,Gov,BC,PI,RPE,PR,CL,Urban,Fertility
0,Algeria,1972,-9,0.0,0,2.742,6.0,6.0,39.831,7.546
1,Algeria,1973,-9,0.0,0,2.642,6.0,6.0,39.997,7.491
2,Algeria,1974,-9,0.0,0,2.222,6.0,6.0,40.163,7.44
3,Algeria,1975,-9,6.0,250,2.711,7.0,7.0,40.33,7.373
4,Algeria,1976,-9,6.0,0,2.431,6.0,6.0,40.497,7.306


In [3]:
# Change to a DataFrame
df_01 = pd.DataFrame(Political_Data)

# Check Data types
df_01.dtypes

Country       object
Year           int64
Gov            int64
BC           float64
PI            object
RPE          float64
PR           float64
CL           float64
Urban        float64
Fertility    float64
dtype: object

In [4]:
# Change PI to numeric
df_01['PI'] = pd.to_numeric(df_01['PI'], errors='coerce')

# Double Checking Data types 
df_01.dtypes

Country       object
Year           int64
Gov            int64
BC           float64
PI           float64
RPE          float64
PR           float64
CL           float64
Urban        float64
Fertility    float64
dtype: object

In [1]:
# Filter the DataFrame for years
df_01 = df_01[(df_01['Year'] >= 1975)]

NameError: name 'df_01' is not defined

In [6]:
# Checking data
df_01.head()

Unnamed: 0,Country,Year,Gov,BC,PI,RPE,PR,CL,Urban,Fertility
3,Algeria,1975,-9,6.0,250.0,2.711,7.0,7.0,40.33,7.373
4,Algeria,1976,-9,6.0,0.0,2.431,6.0,6.0,40.497,7.306
5,Algeria,1977,-9,6.0,0.0,2.643,6.0,6.0,40.928,7.23
6,Algeria,1978,-9,6.0,0.0,2.587,6.0,6.0,41.794,7.14
7,Algeria,1979,-9,6.0,0.0,2.487,6.0,6.0,42.665,7.059


In [7]:
# Define the starting year for intervals
start_year = 1975

# Calculate the interval group
df_01['Interval_Group'] = ((df_01['Year'] - start_year) // 5) * 5 + start_year

# Group by 'ID' and 'Interval_Group', then calculate the mean
df_01 = df_01.groupby(['Country', 'Interval_Group']).mean(numeric_only=True)

In [8]:
# Checking data
df_01.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Gov,BC,PI,RPE,PR,CL,Urban,Fertility
Country,Interval_Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Algeria,1975,1977.0,-9.0,6.0,50.0,2.5718,6.2,6.2,41.2428,7.2216
Algeria,1980,1982.25,-9.0,3.0,0.0,2.275,6.0,6.0,45.52825,6.5605
Algeria,1985,1987.0,-7.6,3.4,175.0,2.5132,5.8,5.8,49.6718,5.3624
Algeria,1990,1992.0,-5.0,2.0,2062.2,1.4426,5.8,5.8,53.6542,4.2208
Algeria,1995,1997.0,-3.0,0.8,3449.8,1.2556,6.0,6.0,57.5492,2.965


In [9]:
# Reset the index  
df_01 = df_01.reset_index()

In [10]:
# Drop the 'Year' column
df_01 = df_01.drop('Year', axis=1)

In [11]:
# Rename the 'Interval_Group' column to 'Year'
df_01 = df_01.rename(columns={'Interval_Group': 'Year'})

# Checking data
df_01.head()

Unnamed: 0,Country,Year,Gov,BC,PI,RPE,PR,CL,Urban,Fertility
0,Algeria,1975,-9.0,6.0,50.0,2.5718,6.2,6.2,41.2428,7.2216
1,Algeria,1980,-9.0,3.0,0.0,2.275,6.0,6.0,45.52825,6.5605
2,Algeria,1985,-7.6,3.4,175.0,2.5132,5.8,5.8,49.6718,5.3624
3,Algeria,1990,-5.0,2.0,2062.2,1.4426,5.8,5.8,53.6542,4.2208
4,Algeria,1995,-3.0,0.8,3449.8,1.2556,6.0,6.0,57.5492,2.965


# Lag 1 Time Frame

In [12]:
def lag_data(df, lag, key_columns=['Year', 'Country','PI']):
    # Make a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()

    # Ensure the key columns are set as the index
    df_copy.set_index(key_columns, inplace=True)
    
    # Identify columns to lag (all columns except the key columns)
    columns_to_lag = [col for col in df_copy.columns if col not in key_columns]
    
    # Apply lag, ensuring it's done within each group defined by 'Country' in the index
    # This assumes 'Country' is one of the key columns and thus part of the multi-level index
    for column in columns_to_lag:
        df_copy[column] = df_copy.groupby(level='Country')[column].shift(lag)
    
    # Reset the index to return to the original structure
    df_reset = df_copy.reset_index()

    return df_reset

In [13]:
data = pd.DataFrame(df_01)
df = data 
lag_amount = 1
df_Avg_01 = lag_data(df, lag_amount)
df_Avg_01.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
0,1975,Algeria,50.0,,,,,,,
1,1980,Algeria,0.0,-9.0,6.0,2.5718,6.2,6.2,41.2428,7.2216
2,1985,Algeria,175.0,-9.0,3.0,2.275,6.0,6.0,45.52825,6.5605
3,1990,Algeria,2062.2,-7.6,3.4,2.5132,5.8,5.8,49.6718,5.3624
4,1995,Algeria,3449.8,-5.0,2.0,1.4426,5.8,5.8,53.6542,4.2208


In [14]:
# Filter the DataFrame for years 1990 to 2020 without using the index
df_Avg_01 = df_Avg_01[(df_Avg_01['Year'] >= 2000) & (df_Avg_01['Year'] <= 2020)]

# Checking Data
df_Avg_01.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
5,2000,Algeria,1975.0,-3.0,0.8,1.2556,6.0,6.0,57.5492,2.965
6,2005,Algeria,1000.0,-2.0,0.0,1.163,6.0,6.0,61.4954,2.5044
7,2010,Algeria,523.0,2.0,0.0,1.3142,6.0,6.0,65.3388,2.6804
13,2000,Angola,1750.0,-38.8,6.6,1.0574,6.0,6.0,46.5294,6.7946
14,2005,Angola,0.0,-3.6,5.4,1.2586,6.0,6.0,52.4588,6.5678


In [15]:
# Exporting DF Lag 1 Year
df_Avg_01.to_csv('Data/Clean/Final/Political-Avg-01.csv', index = False)

In [16]:
data = pd.DataFrame(df_01)
df = data 
lag_amount = 2
df_Avg_02 = lag_data(df, lag_amount)
df_Avg_02.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
0,1975,Algeria,50.0,,,,,,,
1,1980,Algeria,0.0,,,,,,,
2,1985,Algeria,175.0,-9.0,6.0,2.5718,6.2,6.2,41.2428,7.2216
3,1990,Algeria,2062.2,-9.0,3.0,2.275,6.0,6.0,45.52825,6.5605
4,1995,Algeria,3449.8,-7.6,3.4,2.5132,5.8,5.8,49.6718,5.3624


In [17]:
# Filter the DataFrame for years 2000 to 2020 without using the index
df_Avg_02 = df_Avg_02[(df_Avg_02['Year'] >= 2000) & (df_Avg_02['Year'] <= 2020)]
df_Avg_02.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
5,2000,Algeria,1975.0,-5.0,2.0,1.4426,5.8,5.8,53.6542,4.2208
6,2005,Algeria,1000.0,-3.0,0.8,1.2556,6.0,6.0,57.5492,2.965
7,2010,Algeria,523.0,-2.0,0.0,1.163,6.0,6.0,61.4954,2.5044
13,2000,Angola,1750.0,-69.6,1.4,1.1852,6.6,6.6,40.0548,7.1346
14,2005,Angola,0.0,-38.8,6.6,1.0574,6.0,6.0,46.5294,6.7946


In [18]:
# Exporting DF Lag 5 Year
df_Avg_02.to_csv('Data/Clean/Final/Political-Avg-02.csv', index = False)