In [1]:
import os
import pandas as pd
import re
import json

In [2]:
# Contry Codes to JSON
country_data = {
    "SP": 0,  # Spain
    "UK": 1,  # United Kingdom
    "DE": 2,  # Germany
    "DK": 3,  # Denmark
    "HU": 5,  # Hungary
    "SE": 4,  # Sweden
    "IT": 6,  # Italy
    "PO": 7,  # Poland
    "NL": 8   # Netherlands
}

# Saving Contry Codes to path
file_path = 'country_codes.json'

# Writing the dictionary to a JSON file
with open(file_path, 'w') as file:
    json.dump(country_data, file, indent=4)

In [3]:
# Contry Codes to JSON
energy_types = {
    "B01": "Biomass",
    "B02": "Fossil Brown coal/Lignite",
    "B03": "Fossil Coal-derived gas",
    "B04": "Fossil Gas",
    "B05": "Fossil Hard coal",
    "B06": "Fossil Oil",
    "B07": "Fossil Oil shale",
    "B08": "Fossil Peat",
    "B09": "Geothermal",
    "B10": "Hydro Pumped Storage",
    "B11": "Hydro Run-of-river and poundage",
    "B12": "Hydro Water Reservoir",
    "B13": "Marine",
    "B14": "Nuclear",
    "B15": "Other renewable",
    "B16": "Solar",
    "B17": "Waste",
    "B18": "Wind Offshore",
    "B19": "Wind Onshore",
    "B20": "Other",
    "B21": "AC Link",
    "B22": "DC Link",
    "B23": "Substation",
    "B24": "Transformer"
}

# Saving Contry Codes to path
file_path = 'energy_types.json'

# Writing the dictionary to a JSON file
with open(file_path, 'w') as file:
    json.dump(energy_types, file, indent=4)

In [None]:
# Directory where raw CSV files are stored
directory = "../jupyter_notebook/data_samples"
# Parsing date strings, ignoring any timezone information and converting them to datetime objects
date_parser = lambda x: pd.to_datetime(x[:22])
# List to hold all the dataframes
dataframes = []

# Iterate through all files in the directory
for filename in os.listdir(directory):

    if re.match(r'gen_[A-Z]{2}_[A-Z0-9]+\.csv', filename):

        # Read the CSV file
        df = pd.read_csv(os.path.join(directory, filename), converters={'EndTime': date_parser}).set_index('EndTime')

        numeric_cols = df.select_dtypes(include=['number'])
        categorical_cols = df.select_dtypes(exclude=['number', 'datetime64[ns]', 'bool'])

        # Resample the numeric columns and sum
        resampled_df_num = numeric_cols.resample('H').sum()

        # Resample the categorical columns.
        # Here, we take the first value. Adjust the method if needed (e.g., 'last', or a custom function to get the mode)
        resampled_df_cat= categorical_cols.resample('H').last()

        # Combine the resampled DataFrames back together
        resampled_df = pd.concat([resampled_df_num, resampled_df_cat], axis=1)
       
        # Extract country and energy type from filename
        _, country, energy_type = filename.split('')
        energy_type = energy_type.replace('.csv', '') # Remove the file extension

        # Add country and energy type as new columns
        resampled_df['CountryCode'] = country
        resampled_df['EnergyTypeCode'] = energy_type

        # Append the dataframe to the list
        dataframes.append(resampled_df)

# Concatenate all dataframes (if needed)
final_df = pd.concat(dataframes)

In [None]:
# Load the JSON files into dictionaries
with open('country_codes.json', 'r') as file:
    countries_dict = json.load(file)

with open('energy_types.json', 'r') as file:
    energy_types_dict = json.load(file)

# Map the country codes to full names
final_df['CountryLabel'] = final_df['CountryCode'].map(countries_dict)

# Map the energy type codes to full names
final_df['EnergyTypeName'] = final_df['EnergyTypeCode'].map(energy_types_dict)