In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Create subfolder inside existing 'dataset' folder
folder_path = '/content/drive/My Drive/Pet Project/Coffee Consumption/Dataset'
os.makedirs(folder_path, exist_ok=True)

print("Folder created at:", folder_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder created at: /content/drive/My Drive/Pet Project/Coffee Consumption/Dataset


In [None]:
import pandas as pd
import re

output_folder = '/content/drive/My Drive/dataset/cleaned/'

os.makedirs(output_folder, exist_ok=True)

files_and_metrics = {
    'Coffee_domestic_consumption.csv': 'DomesticConsumption',
    'Coffee_export.csv': 'Export',
    'Coffee_green_coffee_inventorie.csv': 'GreenInventory',
    'Coffee_import.csv': 'Import',
    'Coffee_importers_consumption.csv': 'ImporterConsumption',
    'Coffee_production.csv': 'Production',
    'Coffee_re_export.csv': 'ReExport'
}

def clean_year_columns(columns):
    return [re.match(r"^\d{4}", col).group(0) if re.match(r"^\d{4}", col) else col for col in columns]

for file, metric in files_and_metrics.items():
    df = pd.read_csv(os.path.join(folder_path, file))
    df.columns = clean_year_columns(df.columns)

    id_cols = [col for col in df.columns if not col.isnumeric()]
    year_cols = [col for col in df.columns if col.isnumeric()]

    for col in id_cols:
        if df[col].dtype == "object":
            df[col] = df[col].str.strip()

    df_melted = pd.melt(df, id_vars=id_cols, value_vars=year_cols, var_name='Year', value_name=metric)
    df_melted['Year'] = df_melted['Year'].astype(int)

    cleaned_file_path = os.path.join(output_folder, f'cleaned_{file}')
    df_melted.to_csv(cleaned_file_path, index=False)