# Final Cleaning - Lagging the Political Data

This file will create 3 lagged time frames for the political data (1 year, 5 years, 10 years)

In [6]:
import pandas as pd
import numpy as np

In [7]:
Political_Data = pd.read_csv('Data/Clean/Combined/Political-Data.csv')
Political_Data.head()

Unnamed: 0,Country,Year,Gov,BC,PI,RPE,PR,CL,Urban,Fertility
0,Algeria,1972,-9,0.0,0,2.742,6.0,6.0,39.831,7.546
1,Algeria,1973,-9,0.0,0,2.642,6.0,6.0,39.997,7.491
2,Algeria,1974,-9,0.0,0,2.222,6.0,6.0,40.163,7.44
3,Algeria,1975,-9,6.0,250,2.711,7.0,7.0,40.33,7.373
4,Algeria,1976,-9,6.0,0,2.431,6.0,6.0,40.497,7.306


# Lag 1 Year Dataset

In [8]:
def lag_data(df, lag, key_columns=['Year', 'Country','PI']):
    # Make a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()

    # Ensure the key columns are set as the index
    df_copy.set_index(key_columns, inplace=True)
    
    # Identify columns to lag (all columns except the key columns)
    columns_to_lag = [col for col in df_copy.columns if col not in key_columns]
    
    # Apply lag, ensuring it's done within each group defined by 'Country' in the index
    # This assumes 'Country' is one of the key columns and thus part of the multi-level index
    for column in columns_to_lag:
        df_copy[column] = df_copy.groupby(level='Country')[column].shift(lag)
    
    # Reset the index to return to the original structure
    df_reset = df_copy.reset_index()

    return df_reset

In [9]:
data = pd.DataFrame(Political_Data)
df = data 
lag_amount = 1
df_01 = lag_data(df, lag_amount)
df_01.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
0,1972,Algeria,0,,,,,,,
1,1973,Algeria,0,-9.0,0.0,2.742,6.0,6.0,39.831,7.546
2,1974,Algeria,0,-9.0,0.0,2.642,6.0,6.0,39.997,7.491
3,1975,Algeria,250,-9.0,0.0,2.222,6.0,6.0,40.163,7.44
4,1976,Algeria,0,-9.0,6.0,2.711,7.0,7.0,40.33,7.373


In [10]:
# Filter the DataFrame for years 2000 to 2020 without using the index
df_01 = df_01[(df_01['Year'] >= 2000) & (df_01['Year'] <= 2020)]
df_01.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
27,2000,Algeria,1875,-3.0,0.0,1.169,6.0,6.0,59.118,2.624
28,2001,Algeria,2750,-3.0,0.0,1.329,6.0,6.0,59.919,2.566
29,2002,Algeria,3375,-3.0,0.0,1.24,6.0,6.0,60.712,2.507
30,2003,Algeria,0,-3.0,0.0,1.241,6.0,6.0,61.501,2.456
31,2004,Algeria,1875,-3.0,0.0,1.033,6.0,6.0,62.284,2.481


In [11]:
# Exporting DF Lag 1 Year
df_01.to_csv('Data/Clean/Final/Political-01.csv', index = False)

In [12]:
data = pd.DataFrame(Political_Data)
df = data 
lag_amount = 5
df_05 = lag_data(df, lag_amount)
df_05.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
0,1972,Algeria,0,,,,,,,
1,1973,Algeria,0,,,,,,,
2,1974,Algeria,0,,,,,,,
3,1975,Algeria,250,,,,,,,
4,1976,Algeria,0,,,,,,,


In [13]:
# Filter the DataFrame for years 2000 to 2020 without using the index
df_05 = df_05[(df_05['Year'] >= 2000) & (df_05['Year'] <= 2020)]
df_05.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
27,2000,Algeria,1875,-3.0,2.0,1.301,6.0,6.0,55.997,3.456
28,2001,Algeria,2750,-3.0,1.0,1.304,6.0,6.0,56.774,3.136
29,2002,Algeria,3375,-3.0,1.0,1.319,6.0,6.0,57.544,2.897
30,2003,Algeria,0,-3.0,0.0,1.185,6.0,6.0,58.313,2.712
31,2004,Algeria,1875,-3.0,0.0,1.169,6.0,6.0,59.118,2.624


In [14]:
# Exporting DF Lag 1 Year
df_05.to_csv('Data/Clean/Final/Political-05.csv', index = False)

In [15]:
data = pd.DataFrame(Political_Data)
df = data 
lag_amount = 10
df_10 = lag_data(df, lag_amount)
df_10.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
0,1972,Algeria,0,,,,,,,
1,1973,Algeria,0,,,,,,,
2,1974,Algeria,0,,,,,,,
3,1975,Algeria,250,,,,,,,
4,1976,Algeria,0,,,,,,,


In [16]:
# Filter the DataFrame for years 2000 to 2020 without using the index
df_10 = df_10[(df_10['Year'] >= 2000) & (df_10['Year'] <= 2020)]
df_10.head()

Unnamed: 0,Year,Country,PI,Gov,BC,RPE,PR,CL,Urban,Fertility
27,2000,Algeria,1875,-2.0,2.0,1.798,4.0,4.0,52.085,4.556
28,2001,Algeria,2750,-2.0,2.0,1.42,4.0,4.0,52.871,4.386
29,2002,Algeria,3375,-7.0,2.0,1.324,7.0,7.0,53.657,4.257
30,2003,Algeria,0,-7.0,2.0,1.364,7.0,7.0,54.439,4.059
31,2004,Algeria,1875,-7.0,2.0,1.307,7.0,7.0,55.219,3.846


In [17]:
# Exporting DF Lag 10 Year
df_10.to_csv('Data/Clean/Final/Political-10.csv', index = False)