In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

In [3]:
# Load datasets
climate_data_path = "../Data/Climate Change - datasets/climate_change_dataset.csv"
temperature_data_path = "../Data/Climate Change - datasets/Average Surface Temperature.csv"
sea_level_data_path = "../Data/Climate Change - datasets/Global_sea_level_rise.csv"
climate_risk_data_path = "../Data/Climate Change - datasets/Climate Risk Index.csv"
co2_emissions_data_path = "../Data/Climate Change - datasets/co2-emissions-by-sector.csv"
ghg_emissions_data_path = "../Data/Climate Change - datasets/green house gas-emissions-by-sector.csv"
per_capita_co2_data_path = "../Data/Climate Change - datasets/per-capita-co2-vs-average.csv"

In [4]:
# Read datasets
climate_df = pd.read_csv(climate_data_path)
temperature_df = pd.read_csv(temperature_data_path)
sea_level_df = pd.read_csv(sea_level_data_path)
climate_risk_df = pd.read_csv(climate_risk_data_path)
co2_emissions_df = pd.read_csv(co2_emissions_data_path)
ghg_emissions_df = pd.read_csv(ghg_emissions_data_path)
per_capita_co2_df = pd.read_csv(per_capita_co2_data_path)

In [5]:
# Standardize column names for merging
climate_df.rename(columns={"Country": "Entity"}, inplace=True)
temperature_df.rename(columns={"year": "Year", "Entity": "Entity"}, inplace=True)
sea_level_df.rename(columns={"year": "Year"}, inplace=True)
climate_risk_df.rename(columns={"country": "Entity"}, inplace=True)
co2_emissions_df.rename(columns={"Year": "Year", "Entity": "Entity"}, inplace=True)
ghg_emissions_df.rename(columns={"Year": "Year", "Entity": "Entity"}, inplace=True)
per_capita_co2_df.rename(columns={"Year": "Year", "Entity": "Entity"}, inplace=True)

In [None]:
# Merge datasets
merged_df = climate_df.merge(temperature_df[["Entity", "Year", "Average surface temperature"]], on=["Entity", "Year"], how="left")
merged_df = merged_df.merge(sea_level_df[["Year", "mmfrom1993-2008average"]], on="Year", how="left")
merged_df = merged_df.merge(climate_risk_df[["Entity", "cri_rank", "fatalities_per_100k_total", "losses_per_gdp__total"]], on=["Entity"], how="left")
merged_df = merged_df.merge(co2_emissions_df.drop(columns=["Code"]), on=["Entity", "Year"], how="left")
merged_df = merged_df.merge(ghg_emissions_df.drop(columns=["Code"]), on=["Entity", "Year"], how="left")
merged_df = merged_df.merge(per_capita_co2_df.drop(columns=["Code"]), on=["Entity", "Year"], how="left")

# Rename sea level column for clarity
merged_df.rename(columns={"mmfrom1993-2008average": "Sea Level Anomaly (mm)"}, inplace=True)

# Save the consolidated dataset
merged_df.to_csv("Consolidated_Climate_Prediction_Dataset.csv", index=False)

# Display dataset overview
print("Dataset Shape:", merged_df.shape)
print("Missing Values:", merged_df.isnull().sum().sum())
print("Column Names:", merged_df.columns.tolist())

Dataset Shape: (10482, 36)
Missing Values: 21399
Column Names: ['Year', 'Entity', 'Avg Temperature (°C)', 'CO2 Emissions (Tons/Capita)', 'Sea Level Rise (mm)', 'Rainfall (mm)', 'Population', 'Renewable Energy (%)', 'Extreme Weather Events', 'Forest Area (%)', 'Average surface temperature', 'Sea Level Anomaly (mm)', 'cri_rank', 'fatalities_per_100k_total', 'losses_per_gdp__total', 'Carbon dioxide emissions from buildings', 'Carbon dioxide emissions from industry', 'Carbon dioxide emissions from land use change and forestry', 'Carbon dioxide emissions from other fuel combustion', 'Carbon dioxide emissions from transport', 'Carbon dioxide emissions from manufacturing and construction', 'Fugitive emissions of carbon dioxide from energy production', 'Carbon dioxide emissions from electricity and heat', 'Carbon dioxide emissions from bunker fuels', 'Greenhouse gas emissions from agriculture', 'Greenhouse gas emissions from land use change and forestry', 'Greenhouse gas emissions from waste',

In [8]:
# Overview of the dataset
merged_df.tail().T

Unnamed: 0,10477,10478,10479,10480,10481
Year,2011,2011,2011,2011,2011
Entity,Germany,Germany,Germany,Germany,Germany
Avg Temperature (°C),24.1,24.1,24.1,24.1,24.1
CO2 Emissions (Tons/Capita),17.3,17.3,17.3,17.3,17.3
Sea Level Rise (mm),2.1,2.1,2.1,2.1,2.1
Rainfall (mm),2854,2854,2854,2854,2854
Population,398407112,398407112,398407112,398407112,398407112
Renewable Energy (%),41.0,41.0,41.0,41.0,41.0
Extreme Weather Events,3,3,3,3,3
Forest Area (%),19.8,19.8,19.8,19.8,19.8
