In [2]:
# Required Libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [16]:
class ClimateDataLoader:
    def __init__(self, data_dir):
        """Initialize with the directory containing datasets."""
        self.data_dir = data_dir
        self.datasets = {}
        
    def load_data(self, file_mapping):
        """
        Load datasets into a dictionary.
        
        file_mapping: dict
            A dictionary mapping dataset names to their filenames.
        """
        for name, filename in file_mapping.items():
            file_path = os.path.join(self.data_dir, filename)
            try:
                self.datasets[name] = pd.read_csv(file_path)
                print(f"Loaded: {name} ({filename})")
            except Exception as e:
                print(f"Error loading {name}: {e}")

    def get_dataset(self, name):
        """Retrieve a dataset by name."""
        return self.datasets.get(name, None)

    def show_summary(self):
        """Print summary information for all datasets."""
        for name, df in self.datasets.items():
            print(f"\nDataset: {name}")
            print(df.info())
            print(df.head())

# Define dataset paths
file_mapping = {
    "Air_Quality_Index": "Air Quality Index.csv",
    "CO2_by_Source": "co2-by-source industry all countries.csv",
    "CO2_by_Sector": "co2-emissions-by-sector.csv",
    "Global_Sea_Level": "Global_sea_level_rise.csv",
    "GHG_by_Sector": "green house gas-emissions-by-sector.csv",
    "Per_Capita_CO2": "per-capita-co2-vs-average.csv",
    "Total_GHG_Emissions": "total-greenhouse gas-emissions.csv"
}

# Example Usage
data_loader = ClimateDataLoader("../Data/Climate Change - datasets")
data_loader.load_data(file_mapping)
data_loader.show_summary()


Loaded: Air_Quality_Index (Air Quality Index.csv)
Loaded: CO2_by_Source (co2-by-source industry all countries.csv)
Loaded: CO2_by_Sector (co2-emissions-by-sector.csv)
Loaded: Global_Sea_Level (Global_sea_level_rise.csv)
Loaded: GHG_by_Sector (green house gas-emissions-by-sector.csv)
Loaded: Per_Capita_CO2 (per-capita-co2-vs-average.csv)
Loaded: Total_GHG_Emissions (total-greenhouse gas-emissions.csv)

Dataset: Air_Quality_Index
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18121 entries, 0 to 18120
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       18121 non-null  object
 1   Country    18121 non-null  object
 2   Status     18121 non-null  object
 3   AQI Value  18121 non-null  int64 
dtypes: int64(1), object(3)
memory usage: 566.4+ KB
None
         Date    Country                          Status  AQI Value
0  2022-07-21    Albania                            Good         14
1  2022-07-21    Algeria         