In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Read the data
def load_and_process_data(file_path):
    # Read CSV
    df = pd.read_csv(file_path)
    
    # Convert date column to datetime
    df['date'] = pd.to_datetime(df['date'])
    
    return df

In [None]:
def min_max_scale_timeseries(df):
    # Create a copy of the original dataframe
    df_scaled = df.copy()
    
    # Get all columns except 'date'
    timeseries_columns = [col for col in df.columns if col != 'date']
    
    # Initialize MinMaxScaler
    scaler = MinMaxScaler()
    
    # Scale each time series independently
    for column in timeseries_columns:
        # Reshape data for scaler
        data = df[column].values.reshape(-1, 1)
        # Scale the data
        scaled_data = scaler.fit_transform(data)
        # Update the scaled dataframe
        df_scaled[column] = scaled_data
    
    return df_scaled

In [None]:
def save_scaled_data(df_scaled, output_file_path):
    # Save to CSV
    df_scaled.to_csv(output_file_path, index=False)
    print(f"Saved scaled data to: {output_file_path}")

In [None]:
def plot_comparison(original_df, scaled_df, unique_id):
    plt.figure(figsize=(15, 6))
    
    # Plot original data
    plt.subplot(1, 2, 1)
    plt.plot(original_df['date'], original_df[unique_id], label='Original')
    plt.title(f'Original Time Series: {unique_id}')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    
    # Plot scaled data
    plt.subplot(1, 2, 2)
    plt.plot(scaled_df['date'], scaled_df[unique_id], label='Scaled', color='orange')
    plt.title(f'Scaled Time Series: {unique_id}')
    plt.xlabel('Date')
    plt.ylabel('Normalized Value')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Define input and output file paths
input_file = 'data/eod_balances_2110_full.csv'  
output_file = 'data/eod_balances_2110_norm.csv'

In [None]:
# Load and process data
print("Loading data...")
df = load_and_process_data(input_file)

# Scale the time series
print("Scaling time series...")
df_scaled = min_max_scale_timeseries(df)

In [None]:
# Save the scaled data
save_scaled_data(df_scaled, output_file)

In [None]:
# Plotting a specific time series and its scaled version
unique_id = '2' 
plot_comparison(df, df_scaled, unique_id)