# Final Data Integration (Carbon dioxide, Energy, Temperature)

We merge **CO₂ emissions**, **energy consumption**, and **global temperature** into one consolidated dataset.

In [1]:
# Import the libraries
import pandas as pd
from pathlib import Path

In [2]:
# File Paths
BASE_DIR = Path().resolve().parents[1]

ei_FILE   = BASE_DIR / "data" / "pre_processed" / "ei_energy_supply.csv"
co2_FILE  = BASE_DIR / "data" / "pre_processed" / "Final_CO2_Emissions_Dataset.csv"
Temp_FILE  = BASE_DIR / "data" / "pre_processed" / "global_temp_data.csv"

OUTPUT_PATH = BASE_DIR / "data" / "final" / "whole_data_merged.csv"

In [3]:
# Load the datasets

energy_df = pd.read_csv(ei_FILE)
co2_df = pd.read_csv(co2_FILE)
temp_df = pd.read_csv(Temp_FILE)  

In [4]:
print(energy_df["country"]=='World')

0       False
1       False
2       False
3       False
4       False
        ...  
4522     True
4523     True
4524     True
4525     True
4526     True
Name: country, Length: 4527, dtype: bool


In [5]:
co2_df["country"] = co2_df["country"].str.upper().str.strip()
energy_df["country"] = energy_df["country"].str.upper().str.strip()
co2_df["year"] = co2_df["year"].astype(int)

# Merge CO₂ + Energy using OUTER JOIN

merged_ce = pd.merge(co2_df, energy_df, on=["country", "year"], how="outer")

In [6]:
# CO₂ + Energy
merged_ce.to_csv(BASE_DIR / "data" / "pre_processed" / "co2&energy_merged.csv", index=False)

print(" Final dataset saved with shape:", merged_ce.shape)

 Final dataset saved with shape: (61940, 16)


In [7]:
# Filtering 'World' entry from CO₂+Energy & Merge with Global 
# Temperature using LEFT JOIN

world_rows = merged_ce[merged_ce["country"].str.upper() == "WORLD"]
world_merged_temp = pd.merge(world_rows, temp_df, on="year", how="left")

# Keep non-World rows as-is
non_world_rows = merged_ce[merged_ce["country"].str.upper() != "WORLD"]

# Combine both
final_global_merged = pd.concat([non_world_rows, world_merged_temp], ignore_index=True)

In [8]:
# Save the result

final_global_merged.to_csv(OUTPUT_PATH, index=False)

print("Final dataset saved with shape:", final_global_merged.shape)


Final dataset saved with shape: (61940, 19)
