In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.cluster import SpectralClustering
from functions import preprocessing, cluster_color, couleurs_vives
import IPython

# 1/ Dataset upload/overview

In [2]:
dataset = pd.read_csv("data/kaya_dataset.csv")
# countries selection :
countries = dataset.country.unique()
# year selection :
# years = df.year.unique()
years = [2022]  # only 2022

df = dataset[dataset["year"].isin(years)]
df = df[df["country"].isin(countries)]

# 2/ Preprocessing & Training

In [3]:
# feature selection
cols = ['country', 'year', 'iso_code']
features_1 = ['co2', 'energy', 'gdp', 'population']
features_2 = ['co2_per_unit_energy', 'energy_per_gdp', 'gdp_per_capita'] 
features_3 = ['co2_per_unit_energy', 'energy_per_gdp', 'gdp_per_capita', 'population']
features = features_2

# preprocessing
X =  preprocessing(df, years, countries, cols, features)

# training
n_clusters = 5
gamma = 0.1
assign_labels = "cluster_qr"
rs = 42 # seed for random functions (center first position)
sc = SpectralClustering(
    n_clusters=n_clusters, affinity='rbf', assign_labels=assign_labels, random_state=rs
    )
cluster_labels = sc.fit_predict(X)


# 3/ Dataset pour animation

In [5]:

df["cluster"] = cluster_labels.astype(str)

dataset_cluster = pd.merge(
    dataset,
    df[["iso_code", "cluster"]],
    on="iso_code",
    how="left",  
)

dataset_cluster['year'] = dataset_cluster['year'].astype(float)

In [6]:
# v√©rification affectation clusters
for k in range(n_clusters):
    print(set(dataset_cluster[dataset_cluster["cluster"]==str(k)]["country"]))

{'Congo', 'Lesotho', 'Mongolia'}
{'Turkmenistan', 'Bahrain', 'North Korea', 'Trinidad and Tobago', 'Venezuela', 'Iceland'}
{'Albania', 'Canada', 'Germany', 'Spain', 'Greece', 'New Zealand', 'United States', 'Cyprus', 'Australia', 'Mauritius', 'Belgium', 'Luxembourg', 'Qatar', 'Colombia', 'Brazil', 'Denmark', 'United Arab Emirates', 'Taiwan', 'France', 'Costa Rica', 'Lithuania', 'Malta', 'Montenegro', 'Switzerland', 'Turkey', 'Hungary', 'Italy', 'Uruguay', 'South Korea', 'Panama', 'Paraguay', 'Sweden', 'Netherlands', 'Slovenia', 'Japan', 'Latvia', 'Israel', 'Ireland', 'Slovakia', 'Armenia', 'Croatia', 'Hong Kong', 'Chile', 'Portugal', 'Seychelles', 'Norway', 'Finland', 'Romania', 'Singapore', 'Georgia', 'Argentina', 'United Kingdom', 'Austria'}
{'Central African Republic', 'Haiti', 'Togo', 'Pakistan', 'Democratic Republic of Congo', 'Mali', 'Ghana', 'Nicaragua', 'Burkina Faso', 'Angola', 'Liberia', 'Zambia', 'Cameroon', "Cote d'Ivoire", 'Nepal', 'Benin', 'Guinea', 'Afghanistan', 'Guinea

In [7]:
print(features)
dataset_cluster.describe()
# dataset_cluster.shape


['co2_per_unit_energy', 'energy_per_gdp', 'gdp_per_capita']


Unnamed: 0,year,co2,energy,gdp,population,co2_per_unit_energy,energy_per_gdp,gdp_per_capita
count,7745.0,7745.0,7745.0,7745.0,7745.0,7745.0,7745.0,7745.0
mean,1997.914009,169.43036,740778200000.0,425915100000.0,40026760.0,0.239455,1.674191,13025.601709
std,15.104324,683.710379,2786040000000.0,1488989000000.0,135093500.0,0.200058,1.631089,14718.564983
min,1965.0,0.022,97659230.0,164206000.0,64082.0,0.018,0.078,361.188725
25%,1986.0,3.691,18504530000.0,18033690000.0,3593782.0,0.186,0.767,2808.878751
50%,1999.0,20.87,88900390000.0,64346110000.0,9754398.0,0.221,1.245,7828.856656
75%,2011.0,84.838,412821000000.0,254289100000.0,28101180.0,0.261,2.053,18309.742796
max,2022.0,11711.808,44518690000000.0,26966020000000.0,1426437000.0,10.689,25.253,163531.400281


In [8]:
color_map = dict(zip([str(k) for k in range(n_clusters)], couleurs_vives))
color_map

{'0': '#E53935',
 '1': '#9C27B0',
 '2': '#A7C7E7',
 '3': '#B2DFDB',
 '4': '#D87C5B'}

In [9]:
px.scatter(data_frame=dataset_cluster,
           x=features[1],
           y=features[2],
           size='population',
           color='cluster',
           color_discrete_map=color_map,
           title='Carbon emissions, Energy and Wealth 1980 - 2024',
        #    log_x=True,
           range_x=[0,5],
           range_y=[0,40000],
           hover_name='country',
           animation_frame='year',
           height=600,
           size_max=100)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed