## Climate Change Indicator



### Exploring the data

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
dataset = os.getenv("DATASET")

In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv(dataset)

In [3]:
data.head()

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,...,F2013,F2014,F2015,F2016,F2017,F2018,F2019,F2020,F2021,F2022
0,1,"Afghanistan, Islamic Rep. of",AF,AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.281,0.456,1.093,1.555,1.54,1.544,0.91,0.498,1.327,2.012
1,2,Albania,AL,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.333,1.198,1.569,1.464,1.121,2.028,1.675,1.498,1.536,1.518
2,3,Algeria,DZ,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.192,1.69,1.121,1.757,1.512,1.21,1.115,1.926,2.33,1.688
3,4,American Samoa,AS,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.257,1.17,1.009,1.539,1.435,1.189,1.539,1.43,1.268,1.256
4,5,"Andorra, Principality of",AD,AND,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.831,1.946,1.69,1.99,1.925,1.919,1.964,2.562,1.533,3.243


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 72 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ObjectId             225 non-null    int64  
 1   Country              225 non-null    object 
 2   ISO2                 223 non-null    object 
 3   ISO3                 225 non-null    object 
 4   Indicator            225 non-null    object 
 5   Unit                 225 non-null    object 
 6   Source               225 non-null    object 
 7   CTS_Code             225 non-null    object 
 8   CTS_Name             225 non-null    object 
 9   CTS_Full_Descriptor  225 non-null    object 
 10  F1961                188 non-null    float64
 11  F1962                189 non-null    float64
 12  F1963                188 non-null    float64
 13  F1964                188 non-null    float64
 14  F1965                188 non-null    float64
 15  F1966                192 non-null    flo

### Data Cleaning

In [5]:
data.isna().any()

ObjectId     False
Country      False
ISO2          True
ISO3         False
Indicator    False
             ...  
F2018         True
F2019         True
F2020         True
F2021         True
F2022         True
Length: 72, dtype: bool

In [6]:
missing_values = data.iloc[:, :10].isnull().sum()
missing_values

ObjectId               0
Country                0
ISO2                   2
ISO3                   0
Indicator              0
Unit                   0
Source                 0
CTS_Code               0
CTS_Name               0
CTS_Full_Descriptor    0
dtype: int64

In [7]:
data_cleaned = data.dropna()
data_cleaned.isna().any()

ObjectId     False
Country      False
ISO2         False
ISO3         False
Indicator    False
             ...  
F2018        False
F2019        False
F2020        False
F2021        False
F2022        False
Length: 72, dtype: bool

In [8]:
missing_values = data_cleaned.iloc[:, :10].isnull().sum()
missing_values

ObjectId               0
Country                0
ISO2                   0
ISO3                   0
Indicator              0
Unit                   0
Source                 0
CTS_Code               0
CTS_Name               0
CTS_Full_Descriptor    0
dtype: int64

### Data Visualization

In [9]:
years = [col for col in data_cleaned.columns if col.startswith('F')]
global_avg_temp_change = data_cleaned[years].mean()
int_years = [int(year[1:]) for year in years]

In [10]:
#### Line Plot

In [11]:
import plotly.express as px

fig = px.line(
    data_cleaned, 
    x=int_years, 
    y=global_avg_temp_change, 
    title="Global Average Temperature Change",
    labels={'x': 'Year', 'y': 'Temperature Change'},
)
fig.show()

In [12]:
#### Bar Plot

In [13]:
fig = px.bar(
    data_cleaned, 
    x=int_years, 
    y=global_avg_temp_change, 
    title="Global Average Temperature Change",
    labels={'x': 'Year', 'y': 'Temperature Change'},
)
fig.show()

In [14]:
#### Heat Map

In [15]:
selected_years = ['F2002', 'F2006', 'F2010', 'F2014', 'F2018', 'F2022']
selected_countries = data_cleaned.sample(10)['Country'].values
heatmap_data = data_cleaned[data_cleaned['Country'].isin(selected_countries)][selected_years].dropna()
# heatmap_data = data_cleaned.set_index('Country')[selected_years].dropna()

fig = px.imshow(
    heatmap_data,
    labels=dict(x="Year", y="Country", color="Temperature Change"),
    aspect="auto",
    color_continuous_scale='RdBu_r',
    title="Global Average Temperature Change",
    x=selected_years,
    y=selected_countries
)
fig.show()

#### Temperature by Decade

In [16]:
decade_data = data_cleaned.melt(
    id_vars=['Country'], 
    value_vars=years, 
    var_name='Year', 
    value_name='TempChange',
)
decade_data['Decade'] = decade_data['Year'].apply(lambda x: x[1:4] + "0s")

fig = px.box(
    decade_data, 
    x="Decade", 
    y="TempChange", 
    color="Decade", 
    title="Temperature Change by Decade",
    labels={'Decade': 'Decade', 'TempChange': 'Temperature Change (ºC)'},
    )
fig.show()

#### Distribution of Temperature Changes in 2022


In [17]:
fig = px.histogram(
    data_cleaned, x="F2022", nbins=30, title="Histogram",
)
fig.show()

In [64]:
import plotly.figure_factory as ff
import scipy as sp

data_2022 = data_cleaned['F2022']
data_2021 = data_cleaned['F2021']
data_2020 = data_cleaned['F2020']
hist_labels = ['2020','2021','2022']
rug_text = [data_2020, data_2021, data_2022]

fig = ff.create_distplot(
    [data_2020, data_2021, data_2022], 
    hist_labels, 
    rug_text=rug_text, 
    bin_size=.1,
    )
fig.show()

#### Predict temperature change in 2022

In [86]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import numpy as np

In [98]:
years = [col for col in data_cleaned.columns if col.startswith('F')]
years.remove('F2022')

In [92]:
X = data_cleaned[years].values
y = data_cleaned_sample.iloc[:,-1].values

In [93]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [94]:
model = LinearRegression()
model.fit(X_train, y_train)

In [95]:
y_pred = model.predict(X_test)

In [97]:
r2 = r2_score(y_test, y_pred)
r2

0.6944611693962224