In [7]:
import pandas as pd


ZONAL_DATA_PATH = "C:/Users/pcata/OneDrive/Ambiente de Trabalho/dataset_meteo_com_consumo.csv" 
CLUSTER_MAP_PATH = "C:/Users/pcata/OneDrive/Ambiente de Trabalho/clusters_temp_2024.csv"             
OUTPUT_FILE = "dataset_final_4_clusters.csv"            


df_zonal = pd.read_csv(ZONAL_DATA_PATH, sep=';')
df_clusters = pd.read_csv(CLUSTER_MAP_PATH, sep=';')


try:
    df_zonal['date'] = pd.to_datetime(df_zonal['date'], format='%d/%m/%Y', errors='coerce')
except ValueError:
    df_zonal['date'] = pd.to_datetime(df_zonal['date'], errors='coerce')


df_clusters = df_clusters[['zone', 'cluster']]


df_merged = pd.merge(df_zonal, df_clusters, on='zone', how='left')


df_merged['cluster'] = df_merged['cluster'].fillna(-1).astype(int)


df_grouped = df_merged.groupby(['date', 'cluster']).agg(
    hdd18_cluster=('HDD18', 'mean'),
    consumo_total=('consumo_gwh', 'sum'),
    dow=('dow', 'first'),
    is_weekend=('is_weekend', 'first'),
    month=('month', 'first'),
    year=('year', 'first')
).reset_index()


df_pivoted = df_grouped.pivot_table(
    index='date',
    columns='cluster',
    values='hdd18_cluster'
).reset_index()


cluster_cols = {}
for i in range(4):
    if i in df_pivoted.columns:
        cluster_cols[i] = f'HDD18_Cluster{i}'
    
df_pivoted = df_pivoted.rename(columns=cluster_cols)


df_final = df_merged.groupby('date').agg(
    consumo_gwh=('consumo_gwh', 'sum'),
    dow=('dow', 'first'),
    is_weekend=('is_weekend', 'first'),
    month=('month', 'first'),
    year=('year', 'first'),
    day_length_hours=('day_length_hours', 'mean'), 
).reset_index()


df_treino_final = pd.merge(df_final, df_pivoted, on='date', how='left')


df_treino_final.to_csv(OUTPUT_FILE, sep=';', index=False)

print(f"Dataset de Treino Final (4 Clusters) criado em: {OUTPUT_FILE}")
print(f"Número de Linhas (Dias): {len(df_treino_final)}")

  df_zonal = pd.read_csv(ZONAL_DATA_PATH, sep=';')


Dataset de Treino Final (4 Clusters) criado em: dataset_final_4_clusters.csv
Número de Linhas (Dias): 3926
