In [52]:
import pandas as pd

In [53]:
df_co2 = pd.read_csv('./lstm_datasets/co2_prediction.csv')
df_co2.head()

df_energy = pd.read_csv('./lstm_datasets/energy_prediction.csv')
df_energy.head()

df_literacy = pd.read_csv('./lstm_datasets/literacy_prediction.csv')
df_literacy.head()

df_expected_schooling = pd.read_csv('./lstm_datasets/expected_schooling_prediction.csv')
df_expected_schooling.head()

df_co2_2 = pd.read_csv('./lstm_datasets/co2#2_prediction.csv')
df_co2_2.head()

df_gdp = pd.read_csv('./lstm_datasets/gdp_prediction.csv')
df_gdp.head()

df_internet = pd.read_csv('./lstm_datasets/internet_prediction.csv')
df_internet.head()

df_gini = pd.read_csv('./lstm_datasets/gini_prediction.csv')
df_gini.head()

df_mean_schooling = pd.read_csv('./lstm_datasets/mean_schooling_prediction.csv')
df_mean_schooling.head()

df_infrastructure = pd.read_csv('./datasets/infrastructure.csv')
df_infrastructure.head()


Unnamed: 0,Country,Average Power Line,Average Schools,Average Supermarkets,Average Public Roads,Average Hospitals
0,Ukraine,2.0,0.4,1.2,31.4,1.0
1,UAE,1.8,5.4,4.8,44.8,0.6
2,Argentina,2.0,10.0,9.0,8.6,3.6
3,Austria,3.4,15.6,10.2,45.0,1.0
4,Australia,8.0,16.0,10.8,69.8,1.8


In [54]:
countries_sets = [
    set(df_co2['Country']),
    set(df_co2_2['Country']),
    set(df_energy['Country']),
    set(df_gdp['Country']),
    set(df_gini['Country']),
    set(df_internet['Country']),
    set(df_literacy['Country']),
    set(df_expected_schooling['Country']),
    set(df_mean_schooling['Country']),
    set(df_infrastructure['Country'])
]

# Find countries that exist in all DataFrames
common_countries = set.intersection(*countries_sets)

# Filter each DataFrame to only include common countries
df_co2 = df_co2[df_co2['Country'].isin(common_countries)].reset_index(drop=True)
df_co2_2 = df_co2_2[df_co2_2['Country'].isin(common_countries)].reset_index(drop=True)
df_energy = df_energy[df_energy['Country'].isin(common_countries)].reset_index(drop=True)
df_gdp = df_gdp[df_gdp['Country'].isin(common_countries)].reset_index(drop=True)
df_gini = df_gini[df_gini['Country'].isin(common_countries)].reset_index(drop=True)
df_internet = df_internet[df_internet['Country'].isin(common_countries)].reset_index(drop=True)
df_literacy = df_literacy[df_literacy['Country'].isin(common_countries)].reset_index(drop=True)
df_expected_schooling = df_expected_schooling[df_expected_schooling['Country'].isin(common_countries)].reset_index(drop=True)
df_mean_schooling = df_mean_schooling[df_mean_schooling['Country'].isin(common_countries)].reset_index(drop=True)
df_infrastructure = df_infrastructure[df_infrastructure['Country'].isin(common_countries)].reset_index(drop=True)


In [55]:
dfs = [df_co2, df_co2_2, df_energy, df_gdp, df_gini, df_internet, df_literacy, 
       df_expected_schooling, df_mean_schooling, df_infrastructure]
for df in dfs:
    df.sort_values('Country', inplace=True)
    df.reset_index(drop=True, inplace=True)

In [56]:
weighting = {
    'CO2': 0.1893,
    'CO2#2': 0.0714,
    'Energy': -0.2107,
    'GDP': 0.2568,
    'Gini': -1.2037,
    'Internet': 0.1522,
    'Literacy': 0.7657,
    'Expected Schooling': -0.0757,
    'Mean Schooling': -0.4659,
    'Average Power Line': 0.0882,
    'Average Schools': 0.1267,
    'Average Supermarkets': 0.4176,
    'Average Public Roads': 0.2485,
    'Average Hospitals': -0.4348
}


In [57]:
cdi_predictions = pd.DataFrame()
cdi_predictions['Country'] = df_co2['Country']  # Countries are now sorted

for year in range(2024, 2029):
    values = {
        'CO2': df_co2[f'{year} CO2'] * weighting['CO2'],
        'CO2#2': df_co2_2[f'{year} CO2#2'] * weighting['CO2#2'],
        'Energy': df_energy[f'{year} Energy'] * weighting['Energy'],
        'GDP': df_gdp[f'{year} GDP'] * weighting['GDP'],
        'Gini': df_gini[f'{year} Gini'] * weighting['Gini'],
        'Internet': df_internet[f'{year} Internet'] * weighting['Internet'],
        'Literacy': df_literacy[f'{year} Literacy'] * weighting['Literacy'],
        'Expected Schooling': df_expected_schooling[f'{year} Expected Schooling'] * weighting['Expected Schooling'],
        'Mean Schooling': df_mean_schooling[f'{year} Mean Schooling'] * weighting['Mean Schooling'],
        'Average Power Line': df_infrastructure[f'Average Power Line'] * weighting['Average Power Line'],
        'Average Schools': df_infrastructure[f'Average Schools'] * weighting['Average Schools'],
        'Average Supermarkets': df_infrastructure[f'Average Supermarkets'] * weighting['Average Supermarkets'],
        'Average Public Roads': df_infrastructure[f'Average Public Roads'] * weighting['Average Public Roads'],
        'Average Hospitals': df_infrastructure[f'Average Hospitals'] * weighting['Average Hospitals']
    }
    
    cdi_year = sum(values.values())
    cdi_predictions[f'{year} CDI'] = cdi_year

# Export to CSV
cdi_predictions.to_csv('lstm_datasets/cdi_predictions_w_infrastructure.csv', index=False)
print("\nCDI predictions exported to 'lstm_datasets/cdi_predictions_w_infrastructure.csv'")

# Display first few rows
print("\nFirst few rows of CDI predictions:")
print(cdi_predictions.head())



CDI predictions exported to 'lstm_datasets/cdi_predictions_w_infrastructure.csv'

First few rows of CDI predictions:
     Country      2024 CDI      2025 CDI      2026 CDI      2027 CDI  \
0  Australia  15588.500726  15665.320807  15780.059212  15848.181751   
1    Austria  11886.301142  11860.037477  11961.224329  11993.485730   
2    Belarus   1738.278419   1743.583795   1745.869371   1749.381608   
3     Brazil   2432.494313   2435.309960   2395.299090   2364.459432   
4     Canada  11437.761325  11414.485343  11483.140705  11481.408402   

       2028 CDI  
0  15847.066032  
1  11952.260669  
2   1748.163597  
3   2345.923374  
4  11448.820604  
