In [1]:
# Check if running in Google Colab
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')

    # Define the base path for your data directory for Colab
    basePath = '/content/drive/MyDrive'

else:
    # Running locally or in a different environment
    basePath = '..'

In [2]:
import numpy as np
import pandas as pd
from IPython.display import display

In [3]:
file_path = f'data/processed/cleaned_data1.csv'
df = pd.read_csv(f'{basePath}/{file_path}')

In [4]:
# Define columns to group by
group_cols = ['COUNTRY', 'DATE']

# Identify numeric columns to aggregate (excluding the grouping columns)
numeric_cols_for_agg = df.select_dtypes(include=np.number).columns.tolist()
numeric_cols_for_agg = [col for col in numeric_cols_for_agg if col not in group_cols]

# Group by COUNTRY and DATE and calculate the mean of numeric columns
df_grouped = df.groupby(group_cols)[numeric_cols_for_agg].mean().reset_index()

print("Grouped data by Country and Year (re-calculated):")
display(df_grouped.head())

Grouped data by Country and Year (re-calculated):


Unnamed: 0,COUNTRY,DATE,LATITUDE,LONGITUDE,ELEVATION,CDSD,CLDD,DP01,DP10,DT00,...,FZF0,FZF5,FZF6,HDSD,HDSD_ATTRIBUTES,HTDD,PRCP,TAVG,TMAX,TMIN
0,Albania,1981,41.415,19.721,38.4,,,,,,...,,,,,,,,,20.8,
1,Albania,2016,41.415,19.721,38.4,,,,,,...,,,,,,,,,23.1,
2,Algeria,1892,30.5667,2.8667,397.0,,,15.0,8.0,0.0,...,-0.5,-1.0,,,,,65.7,,,13.9
3,Algeria,1893,30.5667,2.8667,397.0,,,7.0,4.0,0.0,...,0.0,0.0,-4.0,,,,25.4,,,13.4
4,Algeria,1895,30.5667,2.8667,397.0,,,6.0,0.0,0.0,...,,0.0,-2.6,,,,6.6,,,13.7


In [None]:
# Define the time periods (using the same periods as before)
start_year_period1 = 2000
end_year_period1 = 2012
start_year_period2 = 2013
end_year_period2 = 2025 

# Filter df_grouped for the two time periods
df_period1 = df_grouped[(df_grouped['DATE'] >= start_year_period1) & (df_grouped['DATE'] <= end_year_period1)].copy()

df_period2 = df_grouped[(df_grouped['DATE'] >= start_year_period2) & (df_grouped['DATE'] <= end_year_period2)].copy()

In [None]:
group_cols = ['COUNTRY', 'DATE']

# We need to be careful about which columns to include. Let's exclude the ones we know are identifiers or attributes.
exclude_cols = group_cols + ['STATION', 'STATION_NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION']

attribute_cols = [col for col in df_grouped.columns if '_ATTRIBUTES' in col]
exclude_cols.extend(attribute_cols)

numeric_cols_to_analyze = [col for col in df_grouped.select_dtypes(include=np.number).columns if col not in exclude_cols]

In [None]:
# Dictionary to store the change Dataframes (clearing previous results if any)
change_dataframes = {}

for col in numeric_cols_to_analyze:
    print(f"\nCalculating percentage change for: {col}")

    avg_period1 = df_period1.groupby('COUNTRY')[col].mean().reset_index()
    avg_period1.rename(columns={col: f'Avg_{col}_Period1'}, inplace=True)

    avg_period2 = df_period2.groupby('COUNTRY')[col].mean().reset_index()
    avg_period2.rename(columns={col: f'Avg_{col}_Period2'}, inplace=True)

    # Merge the two dataframes on the 'COUNTRY' column
    # Use outer merge to keep all countries from both periods
    change_df = pd.merge(avg_period1, avg_period2, on='COUNTRY', how='outer')

    epsilon = 1e-9
    change_df[f'{col}_Change_Percent'] = ((change_df[f'Avg_{col}_Period2'] - change_df[f'Avg_{col}_Period1']) / (change_df[f'Avg_{col}_Period1'] + epsilon)) * 100

    change_dataframes[f'{col}_change'] = change_df

    print(f"Top 5 countries by percentage change in {col}:")
    display(change_df.sort_values(by=f'{col}_Change_Percent', ascending=False).head())


Calculating percentage change for: CDSD
Top 5 countries by percentage change in CDSD:


Unnamed: 0,COUNTRY,Avg_CDSD_Period1,Avg_CDSD_Period2,CDSD_Change_Percent
112,Svalbard [Norway],0.0,0.025,2500000000.0
114,Switzerland,0.0,0.01,1000000000.0
56,Ireland,1.2,2.454545,104.5455
90,Norway,3.932436,7.322222,86.20068
107,Slovakia,53.569231,78.0,45.60597



Calculating percentage change for: CLDD
Top 5 countries by percentage change in CLDD:


Unnamed: 0,COUNTRY,Avg_CLDD_Period1,Avg_CLDD_Period2,CLDD_Change_Percent
112,Svalbard [Norway],0.0,0.025,2500000000.0
114,Switzerland,0.0,0.01,1000000000.0
56,Ireland,1.2,2.454545,104.5455
90,Norway,3.932436,7.322222,86.20068
107,Slovakia,53.569231,78.0,45.60597



Calculating percentage change for: DP01
Top 5 countries by percentage change in DP01:


Unnamed: 0,COUNTRY,Avg_DP01_Period1,Avg_DP01_Period2,DP01_Change_Percent
92,Pakistan,10.75,34.071429,216.943522
112,Svalbard [Norway],74.0,117.0,58.108108
117,Tunisia,68.75,92.166667,34.060606
83,Morocco,37.0,47.875,29.391892
125,Uruguay,79.0,96.055556,21.589311



Calculating percentage change for: DP10
Top 5 countries by percentage change in DP10:


Unnamed: 0,COUNTRY,Avg_DP10_Period1,Avg_DP10_Period2,DP10_Change_Percent
92,Pakistan,6.6875,20.785714,210.814419
112,Svalbard [Norway],19.0,30.0,57.894737
83,Morocco,13.0,17.125,31.730769
57,Israel,1.538462,2.0,30.0
117,Tunisia,29.5,37.583333,27.40113



Calculating percentage change for: DT00
Top 5 countries by percentage change in DT00:


Unnamed: 0,COUNTRY,Avg_DT00_Period1,Avg_DT00_Period2,DT00_Change_Percent
3,Antarctica,71.68,148.416667,107.054501
126,Uzbekistan,3.259259,6.111111,87.5
113,Sweden,5.423077,9.430556,73.896769
48,Greenland [Denmark],77.708333,96.966667,24.782842
20,Canada,90.712228,92.483548,1.952681



Calculating percentage change for: DT32
Top 5 countries by percentage change in DT32:


Unnamed: 0,COUNTRY,Avg_DT32_Period1,Avg_DT32_Period2,DT32_Change_Percent
53,India,0.0,3.827778,382777800000.0
15,Bolivia,0.0,0.8125,81250000000.0
89,Niger,0.0,0.333333,33333330000.0
34,Egypt,0.2,0.5,150.0
22,Chile,8.125,12.75,56.92308



Calculating percentage change for: DX32
Top 5 countries by percentage change in DX32:


Unnamed: 0,COUNTRY,Avg_DX32_Period1,Avg_DX32_Period2,DX32_Change_Percent
3,Antarctica,227.211111,303.357143,33.51334
113,Sweden,42.871795,55.458333,29.358553
48,Greenland [Denmark],164.034722,200.7,22.352144
20,Canada,147.68685,149.084507,0.946365
66,"Korea, South",10.055556,10.111111,0.552486



Calculating percentage change for: DX70
Top 5 countries by percentage change in DX70:


Unnamed: 0,COUNTRY,Avg_DX70_Period1,Avg_DX70_Period2,DX70_Change_Percent
112,Svalbard [Norway],0.0,0.166667,16666670000.0
114,Switzerland,0.076923,0.318182,313.6364
101,Romania,53.35,102.375,91.89316
90,Norway,6.854304,10.168056,48.34556
111,Sri Lanka,189.861111,272.25,43.39429



Calculating percentage change for: DX90
Top 5 countries by percentage change in DX90:


Unnamed: 0,COUNTRY,Avg_DX90_Period1,Avg_DX90_Period2,DX90_Change_Percent
123,United Kingdom,0.0,0.075,7500000000.0
113,Sweden,0.0,0.055556,5555556000.0
85,Nepal,2.0,13.0,550.0
101,Romania,1.533333,7.5,389.1304
107,Slovakia,0.692308,2.375,243.0556



Calculating percentage change for: EMNT
Top 5 countries by percentage change in EMNT:


Unnamed: 0,COUNTRY,Avg_EMNT_Period1,Avg_EMNT_Period2,EMNT_Change_Percent
104,Saudi Arabia,3.4125,18.4,439.194139
22,Chile,0.275,1.175,327.272726
53,India,5.516667,9.854074,78.623699
69,Libya,3.075,5.0,62.601626
80,Mexico,8.85,11.9,34.463277



Calculating percentage change for: EMXP
Top 5 countries by percentage change in EMXP:


Unnamed: 0,COUNTRY,Avg_EMXP_Period1,Avg_EMXP_Period2,EMXP_Change_Percent
92,Pakistan,48.95,107.728571,120.078798
16,Bosnia and Herzegovina,73.25,108.866667,48.623436
106,Seychelles,136.683333,189.3,38.495305
64,Kiribati,91.4,123.8,35.448578
128,Vietnam,208.044444,279.4,34.298227



Calculating percentage change for: EMXT
Top 5 countries by percentage change in EMXT:


Unnamed: 0,COUNTRY,Avg_EMXT_Period1,Avg_EMXT_Period2,EMXT_Change_Percent
101,Romania,24.461667,30.3625,24.122777
111,Sri Lanka,28.977778,31.3875,8.315759
112,Svalbard [Norway],14.961538,16.158333,7.999143
67,Kyrgyzstan,34.453846,36.995,7.37553
85,Nepal,32.4,34.6,6.790123



Calculating percentage change for: FZF0
Top 5 countries by percentage change in FZF0:


Unnamed: 0,COUNTRY,Avg_FZF0_Period1,Avg_FZF0_Period2,FZF0_Change_Percent
22,Chile,-0.15,-0.45,200.000001
3,Antarctica,-2.92,-5.091667,74.372146
86,Netherlands,-0.930769,-1.383333,48.62259
56,Ireland,-0.7,-0.933333,33.333333
107,Slovakia,-0.792308,-0.9875,24.635922



Calculating percentage change for: FZF5
Top 5 countries by percentage change in FZF5:


Unnamed: 0,COUNTRY,Avg_FZF5_Period1,Avg_FZF5_Period2,FZF5_Change_Percent
3,Antarctica,-2.466667,-6.733333,172.972973
107,Slovakia,-0.830769,-1.3625,64.00463
68,Latvia,-0.92,-1.5,63.043478
113,Sweden,-0.692308,-1.077778,55.679012
41,Finland,-1.074359,-1.6,48.926014



Calculating percentage change for: FZF6
Top 5 countries by percentage change in FZF6:


Unnamed: 0,COUNTRY,Avg_FZF6_Period1,Avg_FZF6_Period2,FZF6_Change_Percent
3,Antarctica,-3.632,-6.933333,90.895742
124,United States,-3.347512,-3.940194,17.705152
29,Croatia,-3.507692,-3.925,11.89693
125,Uruguay,-2.575,-2.86,11.067961
48,Greenland [Denmark],-3.025,-3.296667,8.980716



Calculating percentage change for: HDSD
Top 5 countries by percentage change in HDSD:


Unnamed: 0,COUNTRY,Avg_HDSD_Period1,Avg_HDSD_Period2,HDSD_Change_Percent
33,Denmark,3107.2,3971.9,27.828913
101,Romania,5090.345,6402.0,25.767507
3,Antarctica,8700.146667,10849.36,24.703185
113,Sweden,4238.732051,4655.141667,9.823919
48,Greenland [Denmark],8569.389394,9260.66875,8.066845



Calculating percentage change for: HTDD
Top 5 countries by percentage change in HTDD:


Unnamed: 0,COUNTRY,Avg_HTDD_Period1,Avg_HTDD_Period2,HTDD_Change_Percent
33,Denmark,3107.2,3971.9,27.828913
101,Romania,5090.345,6402.0,25.767507
3,Antarctica,8700.146667,10849.36,24.703185
113,Sweden,4238.732051,4655.141667,9.823919
48,Greenland [Denmark],8569.389394,9260.66875,8.066845



Calculating percentage change for: PRCP
Top 5 countries by percentage change in PRCP:


Unnamed: 0,COUNTRY,Avg_PRCP_Period1,Avg_PRCP_Period2,PRCP_Change_Percent
92,Pakistan,129.2625,519.871429,302.182712
57,Israel,13.415385,20.75,54.673165
112,Svalbard [Norway],175.0,243.066667,38.895238
23,China,720.235502,917.533086,27.393482
125,Uruguay,954.95,1214.7,27.200377



Calculating percentage change for: TAVG
Top 5 countries by percentage change in TAVG:


Unnamed: 0,COUNTRY,Avg_TAVG_Period1,Avg_TAVG_Period2,TAVG_Change_Percent
3,Antarctica,-6.538,-12.133333,85.581727
60,Jan Mayen [Norway],0.653846,1.158333,77.156863
48,Greenland [Denmark],-4.402273,-6.576667,49.392531
41,Finland,3.005128,3.741667,24.509386
7,Austria,5.635897,6.46,14.622384



Calculating percentage change for: TMAX
Top 5 countries by percentage change in TMAX:


Unnamed: 0,COUNTRY,Avg_TMAX_Period1,Avg_TMAX_Period2,TMAX_Change_Percent
48,Greenland [Denmark],-0.534722,-2.868333,436.415585
3,Antarctica,-4.22,-8.771429,107.853758
101,Romania,7.156667,12.7625,78.330228
102,Russia,2.613253,4.263974,63.167259
114,Switzerland,2.161538,2.922727,35.215141



Calculating percentage change for: TMIN
Top 5 countries by percentage change in TMIN:


Unnamed: 0,COUNTRY,Avg_TMIN_Period1,Avg_TMIN_Period2,TMIN_Change_Percent
118,Turkey,9.72619,16.166667,66.21787
3,Antarctica,-9.624,-15.925,65.471737
107,Slovakia,1.569231,2.475,57.720588
101,Romania,2.465385,3.604545,46.206212
7,Austria,2.115385,2.903333,37.248485



Percentage change dataframes stored in 'change_dataframes' dictionary.


In [8]:
df_grouped.to_csv(f'{basePath}/data/processed/grouped_data2.csv', index=False)

In [9]:
#Note: the change dataframes will be used in th enext file to generate the graphs/charts so saving it

In [10]:
import pickle
import os

# Define the directory to save the file
save_dir = f'{basePath}/data/processed'
os.makedirs(save_dir, exist_ok=True)

# Define the file path
file_path = os.path.join(save_dir, 'change_dataframes.pkl')

# Save the dictionary using pickle
with open(file_path, 'wb') as f:
    pickle.dump(change_dataframes, f)

print(f"change_dataframes dictionary saved to {file_path}")

change_dataframes dictionary saved to ../data/processed/change_dataframes.pkl
