<a href="https://www.kaggle.com/code/princeiornongu/climate-change-time-series-predicting-the-stc?scriptVersionId=164894752" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

#### Essential Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')

In [2]:
pip install pycountry

Collecting pycountry
  Downloading pycountry-23.12.11-py3-none-any.whl.metadata (12 kB)
Downloading pycountry-23.12.11-py3-none-any.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m45.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycountry
Successfully installed pycountry-23.12.11
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pycountry

# Fetch ISO3 country codes
valid_iso3_codes = [country.alpha_3 for country in pycountry.countries]

# Convert to set for faster lookup
valid_iso3_set = set(valid_iso3_codes)

#### Importing Datasets and Feature Engineering

In [4]:
df = pd.read_csv("/kaggle/input/climate-change-indicators/climate_change_indicators.csv")
df

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,...,F2013,F2014,F2015,F2016,F2017,F2018,F2019,F2020,F2021,F2022
0,1,"Afghanistan, Islamic Rep. of",AF,AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.281,0.456,1.093,1.555,1.540,1.544,0.910,0.498,1.327,2.012
1,2,Albania,AL,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.333,1.198,1.569,1.464,1.121,2.028,1.675,1.498,1.536,1.518
2,3,Algeria,DZ,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.192,1.690,1.121,1.757,1.512,1.210,1.115,1.926,2.330,1.688
3,4,American Samoa,AS,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.257,1.170,1.009,1.539,1.435,1.189,1.539,1.430,1.268,1.256
4,5,"Andorra, Principality of",AD,AND,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.831,1.946,1.690,1.990,1.925,1.919,1.964,2.562,1.533,3.243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,221,Western Sahara,EH,ESH,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.423,1.401,1.510,1.732,2.204,0.942,1.477,2.069,1.593,1.970
221,222,World,,WLD,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.016,1.053,1.412,1.660,1.429,1.290,1.444,1.711,1.447,1.394
222,223,"Yemen, Rep. of",YE,YEM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,,,,,,,,,,
223,224,Zambia,ZM,ZMB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.790,0.917,1.450,1.401,0.105,0.648,0.855,0.891,0.822,0.686


In [5]:
df1 = df.copy()
df1.drop(['ObjectId', 'ISO2', 'CTS_Code', 'CTS_Name', 'CTS_Full_Descriptor', ], axis=1, inplace=True)
df1.columns = df1.columns.str.replace('F', '')
df1 = df1[df1['ISO3'].isin(valid_iso3_set)]
df1

Unnamed: 0,Country,ISO3,Indicator,Unit,Source,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,"Afghanistan, Islamic Rep. of",AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,-0.113,-0.164,0.847,-0.764,-0.244,...,1.281,0.456,1.093,1.555,1.540,1.544,0.910,0.498,1.327,2.012
1,Albania,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.627,0.326,0.075,-0.166,-0.388,...,1.333,1.198,1.569,1.464,1.121,2.028,1.675,1.498,1.536,1.518
2,Algeria,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.164,0.114,0.077,0.250,-0.100,...,1.192,1.690,1.121,1.757,1.512,1.210,1.115,1.926,2.330,1.688
3,American Samoa,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.079,-0.042,0.169,-0.140,-0.562,...,1.257,1.170,1.009,1.539,1.435,1.189,1.539,1.430,1.268,1.256
4,"Andorra, Principality of",AND,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.736,0.112,-0.752,0.308,-0.490,...,0.831,1.946,1.690,1.990,1.925,1.919,1.964,2.562,1.533,3.243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,West Bank and Gaza,PSE,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,-0.110,0.822,0.848,-0.626,-0.031,...,1.114,0.863,1.326,1.615,0.735,2.007,1.204,1.455,1.787,1.074
220,Western Sahara,ESH,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.632,0.576,0.333,0.819,-0.337,...,1.423,1.401,1.510,1.732,2.204,0.942,1.477,2.069,1.593,1.970
222,"Yemen, Rep. of",YEM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.029,-0.009,0.169,-0.251,-0.623,...,,,,,,,,,,
223,Zambia,ZMB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,0.228,-0.168,-0.390,-0.279,-0.418,...,0.790,0.917,1.450,1.401,0.105,0.648,0.855,0.891,0.822,0.686


In [6]:
# Loop through each column and fill missing values with the mean if the column contains numeric data
for column in df1.columns:
    if df1[column].dtype in ['int64', 'float64']:
        df1[column].fillna(df1[column].mean(), inplace=True)

In [7]:
df1.describe()

Unnamed: 0,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
count,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,...,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0
mean,0.162797,-0.01375,-0.006973,-0.069118,-0.247155,0.105005,-0.1108,-0.199495,0.159265,0.092181,...,0.930805,1.115102,1.269112,1.438481,1.280089,1.30217,1.443057,1.551284,1.343042,1.382057
std,0.37093,0.313825,0.354514,0.282994,0.247915,0.350162,0.31336,0.249296,0.28355,0.323241,...,0.315722,0.554662,0.453696,0.390793,0.384935,0.581881,0.455834,0.604871,0.472536,0.652563
min,-0.694,-0.908,-1.27,-0.877,-1.064,-1.801,-1.048,-1.634,-0.9,-1.288,...,0.118,-0.092,-0.43,0.25,0.017,0.238,0.05,0.229,-0.425,-1.305
25%,-0.06725,-0.146,-0.166,-0.21225,-0.36275,-0.00775,-0.252,-0.28725,0.024,-0.021,...,0.749,0.75825,1.0315,1.1635,1.0385,0.88025,1.17425,1.18225,1.02825,0.89225
50%,0.146,-0.025,-0.006973,-0.069118,-0.247155,0.105005,-0.1108,-0.199495,0.159265,0.092181,...,0.919,1.008,1.2315,1.438481,1.280089,1.1485,1.4325,1.493,1.343042,1.3405
75%,0.25275,0.082,0.173,0.09825,-0.11,0.23825,-0.04,-0.0935,0.30475,0.24825,...,1.18225,1.31,1.516,1.69275,1.5145,1.62175,1.676,1.77925,1.5965,1.87525
max,1.892,0.998,1.202,1.097,0.857,1.151,1.134,0.476,0.939,0.978,...,1.643,2.704,2.613,2.459,2.493,2.772,2.689,3.691,2.676,3.243


In [8]:
df3 = pd.read_csv("/kaggle/input/mapping-of-features-world-pop/Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_6298258.csv")
df3.rename(columns={"TableName": "Country", "Region": "Sub-Region", "IncomeGroup": "Income Group"}, inplace =True)
df3.drop(columns=['Unnamed: 5', 'SpecialNotes', 'Country Code'], inplace=True)

In [9]:
df4= pd.read_csv('/kaggle/input/world-population-statistics-2023/world_population_data.csv')
columns_to_drop = ['rank', 'cca3', '2022 population', '2020 population',	'2015 population', '2010 population',	'2000 population',	'1990 population',	'1980 population',
                   '1970 population', '2023 population', 'growth rate', 'world percentage', 'continent']
df4.drop(columns_to_drop, axis=1, inplace=True)
df4.rename(columns={"country": "Country", "area (km²)": "Area (KM²)", "density (km²)": "Density (KM²)"}, inplace =True)
df4

Unnamed: 0,Country,Area (KM²),Density (KM²)
0,India,3287590.00,481
1,China,9706961.00,151
2,United States,9372610.00,37
3,Indonesia,1904569.00,148
4,Pakistan,881912.00,312
...,...,...,...
229,Montserrat,102.00,43
230,Falkland Islands,12173.00,0
231,Niue,261.00,7
232,Tokelau,12.00,189


In [10]:
df5=pd.read_csv('/kaggle/input/melted-world-population/mpopC.csv')
df5.dropna(subset=['Population Size in Y'], inplace=True)
df5['Population Size in Y'] = df5['Population Size in Y'].astype(int)
df5['Year'] = pd.to_numeric(df5['Year'])
df5

Unnamed: 0,Country,Country Code,Year,Population Size in Y
0,Aruba,ABW,1961,55811
1,Africa Eastern and Southern,AFE,1961,134169237
2,Afghanistan,AFG,1961,8790140
3,Africa Western and Central,AFW,1961,99314028
4,Angola,AGO,1961,5441333
...,...,...,...,...
16487,Kosovo,XKX,2022,1761985
16488,"Yemen, Rep.",YEM,2022,33696614
16489,South Africa,ZAF,2022,59893885
16490,Zambia,ZMB,2022,20017675


In [11]:
# Perform inner join based on the "Country" column
merged_data = pd.merge(df1, df3, on="Country").merge(df4, on="Country")
# Save the merged data into a new CSV file
#merged_data= pd.read_csv("/kaggle/input/merged-cc/CC.csv")

In [12]:
f1 = pd.read_csv("/kaggle/input/merged-cc/CC.csv")
f1 = f1.reindex(columns=['Country', 'ISO3','Sub-Region', 'Area (KM²)',	'Density (KM²)', 'Income Group',
                                       '1961', '1962','1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971',
                                        '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980',
                                        '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989',
                                        '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',
                                        '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
                                        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
                                        '2017', '2018', '2019', '2020', '2021', '2022',
                                        'Indicator', 'Unit', 'Source'])# final
f1

Unnamed: 0,Country,ISO3,Sub-Region,Area (KM²),Density (KM²),Income Group,1961,1962,1963,1964,...,2016,2017,2018,2019,2020,2021,2022,Indicator,Unit,Source
0,Albania,ALB,Europe & Central Asia,28748.0,103,Upper middle income,0.627,0.326,0.075,-0.166,...,1.464,1.121,2.028,1.675,1.498,1.536,1.518,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
1,Algeria,DZA,Middle East & North Africa,2381741.0,19,Lower middle income,0.164,0.114,0.077,0.250,...,1.757,1.512,1.210,1.115,1.926,2.330,1.688,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
2,American Samoa,ASM,East Asia & Pacific,199.0,220,High income,0.079,-0.042,0.169,-0.140,...,1.539,1.435,1.189,1.539,1.430,1.268,1.256,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
3,Angola,AGO,Sub-Saharan Africa,1246700.0,29,Lower middle income,0.041,-0.152,-0.190,-0.229,...,1.609,0.870,1.395,1.752,1.162,1.553,1.212,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
4,Antigua and Barbuda,ATG,Latin America & Caribbean,442.0,214,High income,0.090,0.031,0.288,0.214,...,1.097,0.958,0.627,0.797,1.131,0.862,0.770,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,United States,USA,North America,9372610.0,37,High income,0.003,-0.026,0.418,-0.282,...,2.224,1.433,1.276,1.034,1.324,1.144,1.217,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
137,Uruguay,URY,Latin America & Caribbean,181034.0,20,High income,0.398,-0.473,0.330,-0.762,...,0.301,1.546,1.196,0.766,0.890,0.790,0.382,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
138,Vanuatu,VUT,East Asia & Pacific,12189.0,27,Lower middle income,0.168,0.092,-0.165,0.220,...,1.146,1.131,0.613,0.897,1.226,1.147,1.479,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...
139,Zambia,ZMB,Sub-Saharan Africa,752612.0,28,Lower middle income,0.228,-0.168,-0.390,-0.279,...,1.401,0.105,0.648,0.855,0.891,0.822,0.686,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...


#### Melting Dataset

In [13]:
f2= f1.copy()
# Identify columns to keep as identifiers (e.g., Country Name, Country Code, Region, IncomeGroup)
id_cols = ['Country', 'ISO3', 'Sub-Region', 'Income Group', 'Indicator', 'Unit', 'Source',
                                      'Area (KM²)',	'Density (KM²)',]
f2 = pd.melt(f2, id_vars=id_cols, var_name='Year', value_name='Surface Temperature Change')
f2['Year'] = pd.to_numeric(f2['Year'])
f2

Unnamed: 0,Country,ISO3,Sub-Region,Income Group,Indicator,Unit,Source,Area (KM²),Density (KM²),Year,Surface Temperature Change
0,Albania,ALB,Europe & Central Asia,Upper middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,28748.0,103,1961,0.627
1,Algeria,DZA,Middle East & North Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,2381741.0,19,1961,0.164
2,American Samoa,ASM,East Asia & Pacific,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,199.0,220,1961,0.079
3,Angola,AGO,Sub-Saharan Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,1246700.0,29,1961,0.041
4,Antigua and Barbuda,ATG,Latin America & Caribbean,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,442.0,214,1961,0.090
...,...,...,...,...,...,...,...,...,...,...,...
8737,United States,USA,North America,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,9372610.0,37,2022,1.217
8738,Uruguay,URY,Latin America & Caribbean,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,181034.0,20,2022,0.382
8739,Vanuatu,VUT,East Asia & Pacific,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,12189.0,27,2022,1.479
8740,Zambia,ZMB,Sub-Saharan Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,752612.0,28,2022,0.686


In [14]:
dfi = pd.merge(f2, df5, on=["Country", "Year"])
dfi = dfi.drop('Country Code', axis=1)

In [15]:
dfi

Unnamed: 0,Country,ISO3,Sub-Region,Income Group,Indicator,Unit,Source,Area (KM²),Density (KM²),Year,Surface Temperature Change,Population Size in Y
0,Albania,ALB,Europe & Central Asia,Upper middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,28748.0,103,1961,0.627,1659800
1,Algeria,DZA,Middle East & North Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,2381741.0,19,1961,0.164,11598608
2,American Samoa,ASM,East Asia & Pacific,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,199.0,220,1961,0.079,20626
3,Angola,AGO,Sub-Saharan Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,1246700.0,29,1961,0.041,5441333
4,Antigua and Barbuda,ATG,Latin America & Caribbean,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,442.0,214,1961,0.090,56245
...,...,...,...,...,...,...,...,...,...,...,...,...
8737,United States,USA,North America,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,9372610.0,37,2022,1.217,333287557
8738,Uruguay,URY,Latin America & Caribbean,High income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,181034.0,20,2022,0.382,3422794
8739,Vanuatu,VUT,East Asia & Pacific,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,12189.0,27,2022,1.479,326740
8740,Zambia,ZMB,Sub-Saharan Africa,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,752612.0,28,2022,0.686,20017675


In [16]:
dfi[dfi['Country'] == 'India'] # check if the code work by highlighting a specific country

Unnamed: 0,Country,ISO3,Sub-Region,Income Group,Indicator,Unit,Source,Area (KM²),Density (KM²),Year,Surface Temperature Change,Population Size in Y
57,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,1961,-0.208,456351876
198,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,1962,-0.479,467024193
339,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,1963,-0.030,477933619
480,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,1964,-0.030,489059309
621,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,1965,-0.072,500114346
...,...,...,...,...,...,...,...,...,...,...,...,...
8094,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,2018,0.874,1369003306
8235,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,2019,0.802,1383112050
8376,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,2020,0.520,1396387127
8517,India,IND,South Asia,Lower middle income,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,3287590.0,481,2021,0.733,1407563842


#### Exploring Dataset

In [17]:
dfi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8742 entries, 0 to 8741
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Country                     8742 non-null   object 
 1   ISO3                        8742 non-null   object 
 2   Sub-Region                  8742 non-null   object 
 3   Income Group                8742 non-null   object 
 4   Indicator                   8742 non-null   object 
 5   Unit                        8742 non-null   object 
 6   Source                      8742 non-null   object 
 7   Area (KM²)                  8742 non-null   float64
 8   Density (KM²)               8742 non-null   int64  
 9   Year                        8742 non-null   int64  
 10  Surface Temperature Change  8742 non-null   float64
 11  Population Size in Y        8742 non-null   int64  
dtypes: float64(2), int64(3), object(7)
memory usage: 819.7+ KB


In [18]:
dfi.isnull().sum()

Country                       0
ISO3                          0
Sub-Region                    0
Income Group                  0
Indicator                     0
Unit                          0
Source                        0
Area (KM²)                    0
Density (KM²)                 0
Year                          0
Surface Temperature Change    0
Population Size in Y          0
dtype: int64

In [19]:
dfi.describe()

Unnamed: 0,Area (KM²),Density (KM²),Year,Surface Temperature Change,Population Size in Y
count,8742.0,8742.0,8742.0,8742.0,8742.0
mean,629120.4,384.51773,1991.5,0.512356,24304570.0
std,1542088.0,1714.786413,17.896554,0.6355,88065010.0
min,2.02,0.0,1961.0,-2.062,5436.0
25%,21041.0,27.0,1976.0,0.062,766682.8
50%,118484.0,92.0,1991.5,0.4415,4919355.0
75%,488100.0,223.0,2007.0,0.921,14749490.0
max,9984670.0,18149.0,2022.0,3.55,1417173000.0


#### EDA

In [20]:
# Create animated choropleth map
fig = px.choropleth(dfi,
                    locations="ISO3",
                    color="Surface Temperature Change",
                    hover_name="Country",
                    animation_frame="Year",
                    title="Annual Surface Temperature Change",
                    color_continuous_scale=px.colors.sequential.Plasma)

# Add titles and labels
fig.update_layout(geo=dict(showcoastlines=True),
                  coloraxis_colorbar=dict(title="Surface Temp Change"))

# Show the plot
fig.show()

In [21]:
# Time Series Plot by Sub-Region
fig1 = px.line(dfi, x="Year", y="Surface Temperature Change",
               title="Surface Temperature Change Over Time by Sub-Region",
               color="Sub-Region",
               labels={"Surface Temperature Change": "Temperature Change (°C)"})

# Show the plot
fig1.show()

In [22]:
# Create line chart
fig2 = px.line(dfi, x='Year', y='Population Size in Y', color = "Country",
              title='Population Size Over Time by Country')

# Show the plot
fig2.show()

In [23]:
# Time Series Plot by Sub-Region
fig3 = px.line(dfi, x="Year", y="Surface Temperature Change",
               title="Surface Temperature Change Over Time by Income Group",
               color="Income Group",
               labels={"Surface Temperature Change": "Temperature Change (°C)"})

# Show the plot
fig3.show()

In [24]:
# Time Series Plot by Sub-Region
figx = px.line(dfi, x="Year", y="Surface Temperature Change",
               title="Surface Temperature Change Over Time by Income Group",
               color="Country",
               labels={"Surface Temperature Change": "Temperature Change (°C)"})

# Show the plot
figx.show()

In [25]:
# Pie Chart
subregion_pie = dfi.groupby("Sub-Region")["Surface Temperature Change"].sum()
fig4 = px.pie(values=subregion_pie.values, names=subregion_pie.index, title="Surface Temperature Change by Sub-Region")
fig4.show()

In [26]:
# Pie Chart
incomegroup_pie = f2.groupby("Income Group")["Surface Temperature Change"].sum()
fig5 = px.pie(values=incomegroup_pie.values, names=incomegroup_pie.index, title="Surface Temperature Change by Sub-Region")
fig5.show()

In [27]:
# Scatter plot: Surface Temperature Change vs. Area with country names
fig6 = px.scatter(dfi, x='Area (KM²)', y='Surface Temperature Change', hover_data=['Country','Year'],
                  title='Surface Temperature Change vs. Area', color='Area (KM²)',
                  labels={'Surface Temperature Change': 'Temperature Change (°C)', 'Area (KM²)': 'Area (km²)'})
fig6.show()

In [28]:
# Scatter plot: Surface Temperature Change vs. Area with country names
fig7 = px.scatter(f2, x='Density (KM²)', y='Surface Temperature Change', hover_data=['Country','Year'],
                  title='Surface Temperature Change vs. Density', color='Density (KM²)',
                  labels={'Surface Temperature Change': 'Temperature Change (°C)', 'Density (KM²)': 'Density (km²)'})
fig7.show()

In [29]:
# Line Chart - Surface Temperature Change over the Years
fig8 = px.line(dfi, x='Year', y='Surface Temperature Change', title='Surface Temperature Change over the Years')
fig8.show()

In [30]:
# Bubble Chart - Population Size vs Surface Temperature Change
fig9 = px.scatter(dfi, x='Population Size in Y', y='Surface Temperature Change',
                 size='Area (KM²)', color='Income Group', hover_name='Country',
                 title='Population Size vs Surface Temperature Change',
                 labels={'Population Size in Y': 'Population Size', 'Surface Temperature Change': 'Surface Temp Change'})
fig9.show()

In [31]:
# Bar chart of Sub-Region with Average Density
avg_density = dfi.groupby('Sub-Region')['Density (KM²)'].mean().reset_index()
figxi = px.bar(avg_density, x='Sub-Region', y='Density (KM²)', title='Average Density by Sub-Region')
figxi.show()

In [32]:
# Heatmap of Correlation
#correlation_matrix = dfi.corr()
#figxii = px.imshow(correlation_matrix, color_continuous_scale='Viridis',
                 #labels=dict(x="Features", y="Features", color="Correlation"),
                 #title='Correlation Matrix')
#figxii.show()

In [33]:
figxiii = px.scatter_matrix(dfi, dimensions=['Country', 'Area (KM²)', 'Density (KM²)', 'Population Size in Y', 'Surface Temperature Change'], color='Country')
figxiii.show()

#### Time Series

In [34]:
tsdf = dfi.copy()

In [35]:
# Convert the 'Year' column to datetime format
tsdf['Year'] = pd.to_datetime(tsdf['Year'], format='%Y')
tsdf.set_index('Year', inplace=True)

In [36]:
# Group the data by country
grouped = tsdf.groupby('Country')

In [37]:
# Assuming 'STC' is the column you want to forecast
tsd = tsdf[['Surface Temperature Change', 'Country']]
tsd

Unnamed: 0_level_0,Surface Temperature Change,Country
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1961-01-01,0.627,Albania
1961-01-01,0.164,Algeria
1961-01-01,0.079,American Samoa
1961-01-01,0.041,Angola
1961-01-01,0.090,Antigua and Barbuda
...,...,...
2022-01-01,1.217,United States
2022-01-01,0.382,Uruguay
2022-01-01,1.479,Vanuatu
2022-01-01,0.686,Zambia


In [38]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt

In [39]:
# Function to fit ARIMA model and make predictions for a country
def fit_arima(country_series):
    model = ARIMA(country_series, order=(5,1,0))  # Example ARIMA order
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=5)  # Example forecasting 5 steps ahead
    return predictions

In [40]:
# Dictionary to store predictions for each country
predictions_dict = {}

# Loop through each country to fit the model and make predictions
for country, data in grouped:
    country_series = data['Surface Temperature Change']
    predictions = fit_arima(country_series)
    predictions_dict[country] = predictions


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be used.


No frequency information was provided, so inferred frequency YS-JAN will be

In [41]:
predictions_dict

{'Albania': 2023-01-01    1.544039
 2024-01-01    1.581766
 2025-01-01    1.556044
 2026-01-01    1.548471
 2027-01-01    1.551232
 Freq: YS-JAN, Name: predicted_mean, dtype: float64,
 'Algeria': 2023-01-01    1.779681
 2024-01-01    1.801337
 2025-01-01    1.833856
 2026-01-01    1.813483
 2027-01-01    1.836990
 Freq: YS-JAN, Name: predicted_mean, dtype: float64,
 'American Samoa': 2023-01-01    1.363247
 2024-01-01    1.339167
 2025-01-01    1.334091
 2026-01-01    1.332215
 2027-01-01    1.325819
 Freq: YS-JAN, Name: predicted_mean, dtype: float64,
 'Angola': 2023-01-01    1.417233
 2024-01-01    1.485208
 2025-01-01    1.256131
 2026-01-01    1.431289
 2027-01-01    1.365964
 Freq: YS-JAN, Name: predicted_mean, dtype: float64,
 'Antigua and Barbuda': 2023-01-01    0.913354
 2024-01-01    0.902140
 2025-01-01    0.858846
 2026-01-01    0.874159
 2027-01-01    0.882608
 Freq: YS-JAN, Name: predicted_mean, dtype: float64,
 'Argentina': 2023-01-01    0.904673
 2024-01-01    0.872355
 

In [42]:
 #Function to plot Surface Temperature Change for each country
def plot_countries():
    # Loop through each country in predictions_dict
    for country, predictions in predictions_dict.items():
        # Get the actual Surface Temperature Change data for the country
        actual_data = tsd[tsd['Country'] == country]['Surface Temperature Change']

        if actual_data.empty:
            print(f"No data found for {country}.")
            continue

        # Create a figure to hold the plot
        figxxi = go.Figure()

        # Plot the actual data
        figxxi.add_trace(go.Scatter(x=actual_data.index, y=actual_data.values,
                                 mode='lines+markers', name=f'Actual - {country}'))

        # Plot the predicted values if available
        if predictions is not None:
            # Extend the time index to include the future period
            future_index = pd.date_range(start=actual_data.index[-1], periods=len(predictions), freq='Y')
            # Plot the predicted values
            figxxi.add_trace(go.Scatter(x=future_index, y=predictions,
                                     mode='lines+markers', name=f'Predicted - {country}'))

        # Update layout
        figxxi.update_layout(title=f'Surface Temperature Change - Actual vs Predicted ({country})',
                          xaxis_title='Year',
                          yaxis_title='Surface Temperature Change')

        # Show the plot
        figxxi.show()

# Call the function to plot Surface Temperature Change for each country
plot_countries()