In [None]:
import pandapower as pp
import pandas as pd

from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
import os
import chardet      # install this one using pip install chardet

# Install pandapower in a new environment by pip install pandapower==2.13.1

In [None]:
# # Skip this part
# # This code is just to train one model.
# 
# 
# # Assuming aggr_load contains your data with features X and target variable y
# total_data = pd.read_csv('total.csv')
# 
# # Separate features (X) and target variable (y)
# X = total_data.drop(columns=['Load'])  # Features
# y = total_data['Load']  # Target variable
# 
# # 70% of the data for training
# train_size = 0.7    
# 
# # Take the first 70% of all the data to train
# n_train_samples = int(len(X) * train_size)
# 
# X_train, X_test = X[:n_train_samples], X[n_train_samples:]
# y_train, y_test = y[:n_train_samples], y[n_train_samples:]
# 
# # Create an XGBRegressor model
# model = XGBRegressor(random_state=42)
# 
# # Train the model
# model.fit(X_train, y_train)
# 
# # Make predictions on the test data
# predictions = model.predict(X_test)
# 
# # Evaluate the model
# mse = mean_squared_error(y_test, predictions)
# # print("Mean Squared Error:", mse)
# 
# mse_df = pd.DataFrame({'MSE': [mse]})
# mse_df.to_csv('mse_results.csv', index=False)

In [None]:
# # Skip this part
# # Train and plot the result of one model
# 
# 
# # Load the data from 'total.csv'
# total_data = pd.read_csv('total.csv')
# 
# # Assuming 'Time' represents the time-related values in your dataset
# time_column = total_data['Time']
# 
# # Separate features (X) and target variable (y)
# X = total_data.drop(columns=['Load', 'Time'])  # Features
# y = total_data['Load']  # Target variable
# 
# # Determine the split index for 70% of the data
# split_index = int(len(total_data) * 0.7)
# 
# # Split the data into training and testing sets chronologically
# X_train, X_test = X[:split_index], X[split_index:]
# y_train, y_test = y[:split_index], y[split_index:]
# time_train, time_test = time_column[:split_index], time_column[split_index:]
# 
# # Create an XGBRegressor model
# model = XGBRegressor(random_state=42)
# 
# # Train the model
# model.fit(X_train, y_train)
# 
# # Make predictions on the test data
# predictions = model.predict(X_test)
# 
# # Calculate Mean Squared Error (MSE)
# mse = mean_squared_error(y_test, predictions)
# print("Mean Squared Error:", mse)
# 
# # Store the MSE value in a CSV file
# mse_df = pd.DataFrame({'MSE': [mse]})
# mse_df.to_csv('mse_results.csv', index=False)
# 
# # Plot actual and predicted values against time
# plt.figure(figsize=(10, 6))
# plt.plot(time_test, y_test, label='Actual', color='blue', alpha=0.5)
# plt.plot(time_test, predictions, label='Predicted', color='green', alpha=0.5)
# plt.xlabel('Time')
# plt.ylabel('Load')
# plt.title('Actual and Predicted Values Over Time')
# plt.legend()
# plt.show()


In [None]:
# Trains all the models and stores the data as well as the plots in a separate folder

# Function to detect file encoding
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']

# Directories
input_dir = 'dataset_complete'
output_dir = 'results_predictions'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Iterate over each file in the input directory
for file_name in os.listdir(input_dir):
    # Construct full file path
    file_path = os.path.join(input_dir, file_name)
    
    # Check if it is a file
    if os.path.isfile(file_path):
        try:
            # Detect the encoding of the file
            encoding = detect_encoding(file_path)
            
            # Load the data from the file with detected encoding
            total_data = pd.read_csv(file_path, encoding=encoding)

            # Assuming 'Time' represents the time-related values in your dataset
            time_column = total_data['Time']

            # Separate features (X) and target variable (y)
            X = total_data.drop(columns=['Load', 'Time'])  # Features
            y = total_data['Load']  # Target variable

            # Determine the split index for 70% of the data
            split_index = int(len(total_data) * 0.7)

            # Split the data into training and testing sets chronologically
            X_train, X_test = X[:split_index], X[split_index:]
            y_train, y_test = y[:split_index], y[split_index:]
            time_train, time_test = time_column[:split_index], time_column[split_index:]

            # Create an XGBRegressor model
            model = XGBRegressor(random_state=42)

            # Train the model
            model.fit(X_train, y_train)

            # Make predictions on the test data
            predictions = model.predict(X_test)

            # Calculate Mean Squared Error (MSE)
            mse = mean_squared_error(y_test, predictions)
            print(f"Mean Squared Error for {file_name}:", mse)

            # Save the MSE value in a CSV file
            mse_df = pd.DataFrame({'File': [file_name], 'MSE': [mse]})
            mse_results_path = os.path.join(output_dir, 'mse_results.csv')
            if not os.path.exists(mse_results_path):
                mse_df.to_csv(mse_results_path, index=False)
            else:
                mse_df.to_csv(mse_results_path, mode='a', header=False, index=False)

            # Create a DataFrame with actual values, predicted values, and time
            results_df = pd.DataFrame({
                'Time': time_test,
                'Actual': y_test,
                'Predicted': predictions
            })

            # Save the results DataFrame to a CSV file
            results_csv_path = os.path.join(output_dir, f'{file_name}_predictions.csv')
            results_df.to_csv(results_csv_path, index=False)
            print(f'Saved prediction results for {file_name} to {results_csv_path}')

            # Plot actual and predicted values against time
            plt.figure(figsize=(10, 6))
            plt.plot(time_test, y_test, label='Actual', color='blue', alpha=0.5)
            plt.plot(time_test, predictions, label='Predicted', color='green', alpha=0.5)
            plt.xlabel('Time')
            plt.ylabel('Load')
            plt.title(f'Actual and Predicted Values Over Time for {file_name}')
            plt.legend()

            # Save the plot
            plot_path = os.path.join(output_dir, f'{file_name}_plot.png')
            plt.savefig(plot_path)
            plt.close()
            print(f'Saved plot for {file_name} to {plot_path}')

        except Exception as e:
            print(f"Error processing {file_name}: {e}")
