In [41]:
import pandas as pd
import matplotlib.pyplot as plt

In [42]:
df_co2 = pd.read_csv('./lstm_datasets/co2_prediction.csv')
df_co2.head()

df_energy = pd.read_csv('./lstm_datasets/energy_prediction.csv')
df_energy.head()

df_literacy = pd.read_csv('./lstm_datasets/literacy_prediction.csv')
df_literacy.head()

df_expected_schooling = pd.read_csv('./lstm_datasets/expected_schooling_prediction.csv')
df_expected_schooling.head()

df_co2_2 = pd.read_csv('./lstm_datasets/co2#2_prediction.csv')
df_co2_2.head()

df_gdp = pd.read_csv('./lstm_datasets/gdp_prediction.csv')
df_gdp.head()

df_internet = pd.read_csv('./lstm_datasets/internet_prediction.csv')
df_internet.head()

df_gini = pd.read_csv('./lstm_datasets/gini_prediction.csv')
df_gini.head()

df_mean_schooling = pd.read_csv('./lstm_datasets/mean_schooling_prediction.csv')
df_mean_schooling.head()


Unnamed: 0,Country,2024 Mean Schooling,2025 Mean Schooling,2026 Mean Schooling,2027 Mean Schooling,2028 Mean Schooling
0,Afghanistan,2.99122,3.075291,3.151216,3.218589,3.276677
1,Angola,5.709934,5.811828,5.931969,6.051953,6.165771
2,Albania,11.611897,11.720772,11.818372,11.904657,12.009837
3,Andorra,10.526522,10.524909,10.52348,10.522296,10.521358
4,United Arab Emirates,13.743556,14.019395,14.254596,14.528773,14.895


In [43]:
countries_sets = [
    set(df_co2['Country']),
    set(df_co2_2['Country']),
    set(df_energy['Country']),
    set(df_gdp['Country']),
    set(df_gini['Country']),
    set(df_internet['Country']),
    set(df_literacy['Country']),
    set(df_expected_schooling['Country']),
    set(df_mean_schooling['Country']),
]

# Find countries that exist in all DataFrames
common_countries = set.intersection(*countries_sets)

# Filter each DataFrame to only include common countries
df_co2 = df_co2[df_co2['Country'].isin(common_countries)].reset_index(drop=True)
df_co2_2 = df_co2_2[df_co2_2['Country'].isin(common_countries)].reset_index(drop=True)
df_energy = df_energy[df_energy['Country'].isin(common_countries)].reset_index(drop=True)
df_gdp = df_gdp[df_gdp['Country'].isin(common_countries)].reset_index(drop=True)
df_gini = df_gini[df_gini['Country'].isin(common_countries)].reset_index(drop=True)
df_internet = df_internet[df_internet['Country'].isin(common_countries)].reset_index(drop=True)
df_literacy = df_literacy[df_literacy['Country'].isin(common_countries)].reset_index(drop=True)
df_expected_schooling = df_expected_schooling[df_expected_schooling['Country'].isin(common_countries)].reset_index(drop=True)
df_mean_schooling = df_mean_schooling[df_mean_schooling['Country'].isin(common_countries)].reset_index(drop=True)


In [44]:
dfs = [df_co2, df_co2_2, df_energy, df_gdp, df_gini, df_internet, df_literacy, 
       df_expected_schooling, df_mean_schooling]
for df in dfs:
    df.sort_values('Country', inplace=True)
    df.reset_index(drop=True, inplace=True)

In [45]:
weighting = {
    'CO2': -0.0339,
    'CO2#2': 0.1111,
    'Energy': -0.1315,
    'GDP': 0.0203,
    'Gini': -0.3958,
    'Internet': 0.6896,
    'Literacy': 0.9832,
    'Expected Schooling': 0.0757,
    'Mean Schooling': 0.0202
}

In [46]:
# Create a new DataFrame for CDI predictions
# Create a new DataFrame for CDI predictions
cdi_predictions = pd.DataFrame()
cdi_predictions['Country'] = df_co2['Country']  # Use the common countries list

# Calculate CDI for each year
for year in range(2024, 2029):
    # Get predictions for each variable for the current year
    values = {
        'CO2': df_co2[f'{year} CO2'] * weighting['CO2'],
        'CO2#2': df_co2_2[f'{year} CO2#2'] * weighting['CO2#2'],
        'Energy': df_energy[f'{year} Energy'] * weighting['Energy'],
        'GDP': df_gdp[f'{year} GDP'] * weighting['GDP'],
        'Gini': df_gini[f'{year} Gini'] * weighting['Gini'],
        'Internet': df_internet[f'{year} Internet'] * weighting['Internet'],
        'Literacy': df_literacy[f'{year} Literacy'] * weighting['Literacy'],
        'Expected Schooling': df_expected_schooling[f'{year} Expected Schooling'] * weighting['Expected Schooling'],
        'Mean Schooling': df_mean_schooling[f'{year} Mean Schooling'] * weighting['Mean Schooling']
    }
    
    # Calculate CDI for the current year
    cdi_year = sum(values.values())
    cdi_predictions[f'{year} CDI'] = cdi_year

cdi_predictions.head()

Unnamed: 0,Country,2024 CDI,2025 CDI,2026 CDI,2027 CDI,2028 CDI
0,Albania,257.476449,260.731036,264.340391,266.342997,267.581667
1,Algeria,228.285626,230.531269,232.701057,234.420705,235.771508
2,Angola,169.993494,182.784921,201.651239,226.800635,251.407112
3,Armenia,256.280275,260.872501,265.449443,269.433925,272.66997
4,Australia,1386.216945,1391.84825,1400.542381,1405.609881,1405.254213


In [47]:
cdi_predictions.to_csv('lstm_datasets/cdi_predictions.csv', index=False)