In [24]:
import pandas as pd
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Charger les fichiers téléchargés
file1_path = 'co2_emissions_kt_by_country.csv'
file2_path = 'sealevel.csv'
file3_path = 'climate_change_indicators.csv'

# Lire les fichiers CSV
co2_emissions = pd.read_csv(file1_path)
sea_level = pd.read_csv(file2_path)
climate_indicators = pd.read_csv(file3_path)

# Afficher un aperçu des données pour vérifier les colonnes et les structures
co2_emissions_head = co2_emissions.head()
sea_level_head = sea_level.head()
climate_indicators_head = climate_indicators.head()

co2_emissions_head, sea_level_head, climate_indicators_head


(  country_code country_name  year      value
 0          ABW        Aruba  1960  11092.675
 1          ABW        Aruba  1961  11576.719
 2          ABW        Aruba  1962  12713.489
 3          ABW        Aruba  1963  12178.107
 4          ABW        Aruba  1964  11840.743,
    Year  TotalWeightedObservations  GMSL_noGIA  StdDevGMSL_noGIA  \
 0  1993                  327401.31      -38.59             89.86   
 1  1993                  324498.41      -41.97             90.86   
 2  1993                  333018.19      -41.93             87.27   
 3  1993                  297483.19      -42.67             90.75   
 4  1993                  321635.81      -37.86             90.26   
 
    SmoothedGSML_noGIA  GMSL_GIA  StdDevGMSL_GIA  SmoothedGSML_GIA  \
 0              -38.76    -38.59           89.86            -38.75   
 1              -39.78    -41.97           90.86            -39.77   
 2              -39.62    -41.91           87.27            -39.61   
 3              -39.67    -

In [25]:
missing_values = {
    "co2_emissions": co2_emissions.isnull().sum(),
    "sea_level": sea_level.isnull().sum(),
    "climate_indicators": climate_indicators.isnull().sum(),
}

In [26]:
co2_emissions_cleaned = co2_emissions[["country_name", "year", "value"]]



In [27]:
sea_level_cleaned = sea_level[["Year", "GMSL_noGIA", "SmoothedGSML_noGIA"]]



In [28]:
climate_indicators_cleaned = climate_indicators[
    ["Country", "Indicator", "Unit"] + [col for col in climate_indicators.columns if col.startswith("F")]
]

In [29]:
climate_indicators_columns = climate_indicators.columns

{
    "total_columns": len(climate_indicators_columns),
    "column_names": climate_indicators_columns.tolist(),
}

{'total_columns': 72,
 'column_names': ['ObjectId',
  'Country',
  'ISO2',
  'ISO3',
  'Indicator',
  'Unit',
  'Source',
  'CTS_Code',
  'CTS_Name',
  'CTS_Full_Descriptor',
  'F1961',
  'F1962',
  'F1963',
  'F1964',
  'F1965',
  'F1966',
  'F1967',
  'F1968',
  'F1969',
  'F1970',
  'F1971',
  'F1972',
  'F1973',
  'F1974',
  'F1975',
  'F1976',
  'F1977',
  'F1978',
  'F1979',
  'F1980',
  'F1981',
  'F1982',
  'F1983',
  'F1984',
  'F1985',
  'F1986',
  'F1987',
  'F1988',
  'F1989',
  'F1990',
  'F1991',
  'F1992',
  'F1993',
  'F1994',
  'F1995',
  'F1996',
  'F1997',
  'F1998',
  'F1999',
  'F2000',
  'F2001',
  'F2002',
  'F2003',
  'F2004',
  'F2005',
  'F2006',
  'F2007',
  'F2008',
  'F2009',
  'F2010',
  'F2011',
  'F2012',
  'F2013',
  'F2014',
  'F2015',
  'F2016',
  'F2017',
  'F2018',
  'F2019',
  'F2020',
  'F2021',
  'F2022']}

In [30]:
# Filtrer les colonnes correspondant aux années 2013 à 2022
year_columns = [f"F{year}" for year in range(2013, 2023)]
selected_columns = ["Country", "Indicator", "Unit"] + year_columns

# Filtrer le jeu de données pour ne conserver que les colonnes nécessaires
climate_indicators_cleaned = climate_indicators[selected_columns]

# Renommer les colonnes des années pour faciliter la compréhension
climate_indicators_cleaned.columns = ["Country", "Indicator", "Unit"] + [str(year) for year in range(2013, 2023)]

# Vérifier les données nettoyées
climate_indicators_cleaned.head(30)


Unnamed: 0,Country,Indicator,Unit,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,"Afghanistan, Islamic Rep. of",Temperature change with respect to a baseline ...,Degree Celsius,1.281,0.456,1.093,1.555,1.54,1.544,0.91,0.498,1.327,2.012
1,Albania,Temperature change with respect to a baseline ...,Degree Celsius,1.333,1.198,1.569,1.464,1.121,2.028,1.675,1.498,1.536,1.518
2,Algeria,Temperature change with respect to a baseline ...,Degree Celsius,1.192,1.69,1.121,1.757,1.512,1.21,1.115,1.926,2.33,1.688
3,American Samoa,Temperature change with respect to a baseline ...,Degree Celsius,1.257,1.17,1.009,1.539,1.435,1.189,1.539,1.43,1.268,1.256
4,"Andorra, Principality of",Temperature change with respect to a baseline ...,Degree Celsius,0.831,1.946,1.69,1.99,1.925,1.919,1.964,2.562,1.533,3.243
5,Angola,Temperature change with respect to a baseline ...,Degree Celsius,1.044,0.828,1.331,1.609,0.87,1.395,1.752,1.162,1.553,1.212
6,Anguilla,Temperature change with respect to a baseline ...,Degree Celsius,0.77,0.814,1.051,1.125,0.96,0.664,0.843,1.224,0.893,0.839
7,Antigua and Barbuda,Temperature change with respect to a baseline ...,Degree Celsius,0.783,0.744,1.035,1.097,0.958,0.627,0.797,1.131,0.862,0.77
8,Argentina,Temperature change with respect to a baseline ...,Degree Celsius,0.442,0.951,0.957,0.488,1.095,0.878,0.76,1.123,1.031,0.643
9,"Armenia, Rep. of",Temperature change with respect to a baseline ...,Degree Celsius,1.407,1.283,1.931,1.356,0.889,2.772,1.859,1.954,2.087,1.707


In [31]:
import matplotlib.pyplot as plt

# Agréger les émissions de CO2 globalement par année
global_co2_emissions = co2_emissions_cleaned.groupby("year")["value"].sum()

# Visualiser les émissions de CO2 globales
plt.figure(figsize=(10, 6))
plt.plot(global_co2_emissions.index, global_co2_emissions.values, marker='o')
plt.title("Global CO2 Emissions Over Time (kt)", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("CO2 Emissions (kt)", fontsize=12)
plt.grid()
plt.show()

In [32]:
# Identifier les 5 pays ayant les émissions totales les plus élevées
top_countries = co2_emissions_cleaned.groupby("country_name")["value"].sum().nlargest(5)

# Filtrer les données pour ces 5 pays et regrouper par année
top_countries_emissions = co2_emissions_cleaned[
    co2_emissions_cleaned["country_name"].isin(top_countries.index)
]
top_countries_emissions = top_countries_emissions.groupby(["year", "country_name"])["value"].sum().unstack()

# Visualiser les tendances générales des émissions pour ces 5 pays
plt.figure(figsize=(12, 8))
for country in top_countries.index:
    plt.plot(top_countries_emissions.index, top_countries_emissions[country], marker='o', label=country)

plt.title("Top 5 Countries by CO2 Emissions Over Time", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("CO2 Emissions (kt)", fontsize=12)
plt.legend(title="Country")
plt.grid()
plt.show()

# Résultat du top 5 des pays avec leurs émissions totales
top_countries.reset_index(name="Total Emissions (kt)")

In [33]:
# Visualiser les tendances du niveau de la mer (GMSL_noGIA)
plt.figure(figsize=(10, 6))
plt.plot(sea_level_cleaned["Year"], sea_level_cleaned["GMSL_noGIA"], marker='o', color='teal')
plt.title("Global Mean Sea Level (GMSL) Over Time (No GIA Adjustment)", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("GMSL (mm)", fontsize=12)
plt.grid()
plt.show()

# Résumé statistique des niveaux de la mer
sea_level_summary = sea_level_cleaned.describe()
sea_level_summary

In [34]:
# Filtrer les données pour l'indicateur de température uniquement
temperature_data = climate_indicators_cleaned[
    climate_indicators_cleaned["Indicator"].str.contains("Temperature change", case=False)
]

# Calculer la moyenne des températures pour chaque année (colonnes de 2013 à 2022)
temperature_trends = temperature_data.iloc[:, 3:].mean()

# Visualiser les tendances moyennes des températures globales
plt.figure(figsize=(10, 6))
plt.plot(temperature_trends.index, temperature_trends.values, marker='o', color='darkorange')
plt.title("Global Temperature Change Trends (2013-2022)", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Temperature Change (°C)", fontsize=12)
plt.grid()
plt.show()

# Résumé statistique des changements de température
temperature_summary = temperature_trends.describe()
temperature_summary

In [35]:
# Fusionner les tendances globales pour les corrélations
# Ajouter une colonne "average_temperature" aux années correspondantes
co2_global = global_co2_emissions.reset_index()
co2_global.columns = ["Year", "CO2_Emissions"]
co2_global["Year"] = co2_global["Year"].astype(int)

# Moyenne des températures globales de 2013 à 2022
temperature_global = temperature_trends.reset_index()
temperature_global.columns = ["Year", "Average_Temperature"]
temperature_global["Year"] = temperature_global["Year"].astype(int)

# Moyenne des niveaux de la mer de 2013 à 2021 (données disponibles jusqu'à 2021)
sea_level_avg = sea_level_cleaned.groupby("Year")["GMSL_noGIA"].mean().reset_index()
sea_level_avg.columns = ["Year", "Sea_Level"]

# Fusionner les trois ensembles de données sur l'année
merged_data = pd.merge(co2_global, sea_level_avg, on="Year", how="inner")
merged_data = pd.merge(merged_data, temperature_global, on="Year", how="inner")

# Corrélation entre les variables
correlation_matrix = merged_data.corr()

merged_data, correlation_matrix

CO₂ et Niveau de la mer : Corrélation modérée (0,78), indiquant une relation entre les émissions et la montée des eaux.
CO₂ et Température : Corrélation faible/modérée (0,49), suggérant une influence indirecte.
Température et Niveau de la mer : Corrélation élevée (0,90), montrant une forte relation entre le réchauffement climatique et la montée des eaux.

# Training of the model (SARIMAX)

In [36]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import numpy as np

# Préparation des données pour les séries temporelles
time_series_data = merged_data.set_index("Year")

# Fonction modifiée pour ajuster SARIMA avec une recherche plus précise des paramètres
def train_sarima(series, order=(1, 1, 1), seasonal_order=(0, 1, 1, 3), forecast_steps=10):
    """
    Entraîne un modèle SARIMA avec des paramètres ajustés pour éviter des anomalies importantes dans les prédictions.
    """
    # Ajuster le modèle
    model = SARIMAX(
        series,
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False,
    )
    results = model.fit(disp=False)
    
    # Générer les prédictions futures
    forecast = results.get_forecast(steps=forecast_steps)
    forecast_index = np.arange(series.index[-1] + 1, series.index[-1] + 1 + forecast_steps)
    forecast_values = forecast.predicted_mean
    
    return forecast_index, forecast_values, results

# Prédictions pour chaque variable avec des paramètres révisés
forecast_steps = 10
co2_forecast_index, co2_forecast_values, co2_model = train_sarima(
    time_series_data["CO2_Emissions"], order=(2, 1, 2), seasonal_order=(0, 1, 1, 3), forecast_steps=forecast_steps
)
sea_level_forecast_index, sea_level_forecast_values, sea_level_model = train_sarima(
    time_series_data["Sea_Level"], order=(1, 1, 1), seasonal_order=(1, 1, 0, 12), forecast_steps=forecast_steps
)
temperature_forecast_index, temperature_forecast_values, temperature_model = train_sarima(
    time_series_data["Average_Temperature"], order=(1, 0, 1), seasonal_order=(1, 0, 1, 12), forecast_steps=forecast_steps
)

# Consolidation des résultats pour la visualisation et vérification
forecasts = {
    "CO2_Emissions": (co2_forecast_index, co2_forecast_values),
    "Sea_Level": (sea_level_forecast_index, sea_level_forecast_values),
    "Average_Temperature": (temperature_forecast_index, temperature_forecast_values),
}

forecasts

In [37]:
# Visualisation des nouvelles prédictions ajustées pour chaque variable
plt.figure(figsize=(15, 10))

# CO2 Emissions
plt.subplot(3, 1, 1)
plt.plot(time_series_data.index, time_series_data["CO2_Emissions"], label="Observed", marker='o')
plt.plot(co2_forecast_index, co2_forecast_values, label="Forecast", linestyle='--', color='orange')
plt.title("CO2 Emissions Prediction (kt) - Adjusted", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("CO2 Emissions (kt)", fontsize=12)
plt.legend()
plt.grid()

# Sea Level
plt.subplot(3, 1, 2)
plt.plot(time_series_data.index, time_series_data["Sea_Level"], label="Observed", marker='o')
plt.plot(sea_level_forecast_index, sea_level_forecast_values, label="Forecast", linestyle='--', color='teal')
plt.title("Sea Level Prediction (mm) - Adjusted", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Sea Level (mm)", fontsize=12)
plt.legend()
plt.grid()

# Average Temperature
plt.subplot(3, 1, 3)
plt.plot(time_series_data.index, time_series_data["Average_Temperature"], label="Observed", marker='o')
plt.plot(temperature_forecast_index, temperature_forecast_values, label="Forecast", linestyle='--', color='red')
plt.title("Average Temperature Prediction (°C) - Adjusted", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Temperature Change (°C)", fontsize=12)
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()
