In [None]:
import pandas as pd
import os
import pytz
from datetime import datetime
import numpy as np
import plotly.graph_objs as go

In [None]:
# Make Parquet CSV
folder_path = "../data/important-parquet/"
files = os.listdir(folder_path)
output_folder = "../data/important/"  # Specify the folder where you want to save the CSV files

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

for f in files:
    if f.endswith(".parquet"):
        # Read the Parquet file
        df = pd.read_parquet(os.path.join(folder_path, f))

        # Create the corresponding CSV file name
        csv_filename = os.path.splitext(f)[0] + ".csv"
        csv_path = os.path.join(output_folder, csv_filename)

        # Check if the CSV file already exists and remove it
        if os.path.exists(csv_path):
            os.remove(csv_path)
        
        # Save the Parquet data as a CSV file
        df.to_csv(csv_path, index=False)

In [None]:
# Define constants for file paths
INPUT_FOLDER = "../data/important/"
CONVERTED_FOLDER = "../data/converted/"
RESAMPLED_FOLDER = "../data/resampled/"
MERGED_CSV_PATH = "../data/merged.csv"

# Function to create folders if they don't exist
def create_folders_if_not_exist(*folders):
    for folder in folders:
        os.makedirs(folder, exist_ok=True)

# Function to delete a file if it exists
def delete_file_if_exists(file_path):
    if os.path.exists(file_path):
        os.remove(file_path)

# Convert CSV files to Series
def convert_csv_to_series(input_folder, output_folder):
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".csv"):
            df = pd.read_csv(os.path.join(input_folder, file_name))
            df.index = pd.to_datetime(df['datumBeginMeting'])
            df.index.name = None
            df.drop(columns=['datumEindeMeting', 'datumBeginMeting'], inplace=True)
            df_to_series = df["hstWaarde"]
            filename = df['historianTagnummer'].iloc[0].split('.')[0] + f'_{file_name}'
            df_to_series.to_csv(os.path.join(output_folder, filename), header=True)

# Resample CSV files
def resample_csv_files(input_folder, output_folder):
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".csv"):
            df = pd.read_csv(os.path.join(input_folder, file_name), parse_dates=True, index_col=0)
            df.index = pd.to_datetime(df.index)
            minutely_index = pd.date_range(start=df.index.min(), end=df.index.max(), freq='T')
            new_df = pd.DataFrame(index=minutely_index)
            merged_df = new_df.join(df, how='left').interpolate(method='time')
            merged_df = merged_df.groupby(merged_df.index).mean()
            merged_df.to_csv(os.path.join(output_folder, file_name))

# Merge resampled CSV files
def merge_resampled_csv_files(input_folder, output_path):
    merged = pd.DataFrame()
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".csv"):
            df = pd.read_csv(os.path.join(input_folder, file_name), index_col=0)
            df.index = pd.to_datetime(df.index)
            df = df.rename(columns={"hstWaarde": file_name})
            merged = pd.merge(df, merged, left_index=True, right_index=True, how='outer')
    merged.to_csv(output_path)

# Calculate correlations and plot
def calculate_correlations_and_plot(merged_csv_path):
    df = pd.read_csv(merged_csv_path, index_col=0)
    df_corr = df.corr()
    fig = go.Figure()
    fig.add_trace(
        go.Heatmap(
            x=df_corr.columns,
            y=df_corr.index,
            z=np.array(df_corr)
        )
    )
    # Add additional plot customization if needed
    
    # Show or save the plot
    fig.show()

if __name__ == "__main__":
    # Create necessary folders if they don't exist
    create_folders_if_not_exist(INPUT_FOLDER, CONVERTED_FOLDER, RESAMPLED_FOLDER)

    # Delete files if they already exist
    delete_file_if_exists(MERGED_CSV_PATH)  # For example, delete the merged CSV file if it exists

    # Convert CSV files to Series
    convert_csv_to_series(INPUT_FOLDER, CONVERTED_FOLDER)

    # Resample CSV files
    resample_csv_files(CONVERTED_FOLDER, RESAMPLED_FOLDER)

    # Merge resampled CSV files
    merge_resampled_csv_files(RESAMPLED_FOLDER, MERGED_CSV_PATH)

    # Calculate correlations and plot
    calculate_correlations_and_plot(MERGED_CSV_PATH)
