In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import glob

In [20]:
# Initialize an empty DataFrame to append all the data
overall_df = pd.DataFrame()

# Iterate over all CSV files in the downloaded_csvs folder
for file_name in glob.glob('data/downloaded_csvs/*.csv'):
    # Read the current CSV file
    temp_df = pd.read_csv(file_name, header=None, names=['hour_number', 'price', 'to_drop'])
    # Extract date from the filename dynamically and prepend to the hour number
    date_str = file_name.split('_')[-1].split('.')[0]  # Extracts date from the filename
    temp_df['hour_number'] = date_str + '-' + temp_df['hour_number'].astype(str)
    # Append the current DataFrame to the overall DataFrame
    overall_df = pd.concat([overall_df, temp_df], ignore_index=True)


In [21]:
overall_df

Unnamed: 0,hour_number,price,to_drop
0,20240223-1,25.32,DSO-RD;
1,20240223-2,23.86,DSO-RD;
2,20240223-3,20.92,DSO-RD;
3,20240223-4,19.73,DSO-RD;
4,20240223-5,20.07,DSO-RD;
...,...,...,...
2190,20240212-20,34.26,DSO-RD;
2191,20240212-21,42.97,DSO-RD;
2192,20240212-22,38.65,DSO-RD;
2193,20240212-23,29.44,DSO-RD;


In [41]:
# Filter the data where hour number = 20230410-20
filtered_df_o = overall_df[overall_df['hour_number'] == '20240301-24']
filtered_df_o

Unnamed: 0,hour_number,price,to_drop
994,20240301-24,29.41,DSO-RD;


In [31]:
# Initialize an empty DataFrame to append all the weekly data
overall_weekly_df = pd.DataFrame()

# Iterate over all CSV files in the weekly_market folder
for file_name in glob.glob('data/weekly_market/*.csv'):
    # Read the current CSV file
    temp_weekly_df = pd.read_csv(file_name, header=None, names=['date', 'hour_number', 'demand', 'capacity'])
    # Remove the slash in the date and append the date to the hour number
    temp_weekly_df['date'] = temp_weekly_df['date'].str.replace('/', '')
    temp_weekly_df['hour_number'] = temp_weekly_df['date'] + '-' + temp_weekly_df['hour_number'].astype(str)
    # Drop the now redundant date column
    temp_weekly_df.drop(columns=['date'], inplace=True)
    # Append the current DataFrame to the overall weekly DataFrame
    overall_weekly_df = pd.concat([overall_weekly_df, temp_weekly_df], ignore_index=True)


In [42]:
# Filter the data where hour number = 20230410-20
filtered_df = overall_weekly_df[overall_weekly_df['hour_number'] == '20240301-24']
filtered_df

Unnamed: 0,hour_number,demand,capacity
15143,20240301-24,15020,26701
60093,20240301-24,15020,26701
62301,20240301-24,15020,26701
69764,20240301-24,15020,26701
71636,20240301-24,15020,26701
78955,20240301-24,15020,26701
81619,20240301-24,15020,26701


In [43]:
combined_df = pd.merge(overall_df, overall_weekly_df, on='hour_number')


In [44]:
combined_df

Unnamed: 0,hour_number,price,to_drop,demand,capacity
0,20240223-1,25.32,DSO-RD;,14740,24684
1,20240223-1,25.32,DSO-RD;,14740,24684
2,20240223-1,25.32,DSO-RD;,14740,24684
3,20240223-1,25.32,DSO-RD;,14740,24684
4,20240223-1,25.32,DSO-RD;,14740,24684
...,...,...,...,...,...
14947,20240212-24,26.16,DSO-RD;,15942,24777
14948,20240212-24,26.16,DSO-RD;,15942,24777
14949,20240212-24,26.16,DSO-RD;,15942,24777
14950,20240212-24,26.16,DSO-RD;,15942,24777


In [45]:
combined_df.drop_duplicates(subset='hour_number', keep='first', inplace=True)

In [46]:
combined_df

Unnamed: 0,hour_number,price,to_drop,demand,capacity
0,20240223-1,25.32,DSO-RD;,14740,24684
7,20240223-2,23.86,DSO-RD;,14314,24748
14,20240223-3,20.92,DSO-RD;,14103,24909
21,20240223-4,19.73,DSO-RD;,14063,25234
28,20240223-5,20.07,DSO-RD;,14210,25448
...,...,...,...,...,...
14917,20240212-20,34.26,DSO-RD;,18639,24960
14924,20240212-21,42.97,DSO-RD;,18405,24809
14931,20240212-22,38.65,DSO-RD;,17775,24740
14938,20240212-23,29.44,DSO-RD;,16790,24796


In [47]:
combined_df.to_csv('data/cleaned_data.csv', index=False)

In [48]:
saved_df = pd.read_csv('data/cleaned_data.csv')

In [49]:
saved_df

Unnamed: 0,hour_number,price,to_drop,demand,capacity
0,20240223-1,25.32,DSO-RD;,14740,24684
1,20240223-2,23.86,DSO-RD;,14314,24748
2,20240223-3,20.92,DSO-RD;,14103,24909
3,20240223-4,19.73,DSO-RD;,14063,25234
4,20240223-5,20.07,DSO-RD;,14210,25448
...,...,...,...,...,...
2179,20240212-20,34.26,DSO-RD;,18639,24960
2180,20240212-21,42.97,DSO-RD;,18405,24809
2181,20240212-22,38.65,DSO-RD;,17775,24740
2182,20240212-23,29.44,DSO-RD;,16790,24796
