In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV

In [None]:
file_path = "Solar Power Plant Data.csv"
dtypes = {
  'Date-Hour': str,
  'WindSpeed': float,
  'Sunshine': float,
  'AirPressure': float,
  'Radiation': float,
  'AirTemperature': float,
  'RelativeAirHumidity': float,
  'SystemProduction': float,
}

try:
  df = pd.read_csv(file_path, sep=',', dtype=dtypes, na_values=['nan','?'])
except FileNotFoundError:
  print(f"Error: File not found at {file_path}. Please check the file path.")
  df = None

df = df.rename(columns={'Date-Hour(NMT)': 'Date-Time'})


df.info()
df.head()

In [None]:
if df is not None:
  df['Date-Time'] = pd.to_datetime(df['Date-Time'], format='%d.%m.%Y-%H:%M')
  df['Day'] = df['Date-Time'].dt.day
  df['Month'] = df['Date-Time'].dt.month
  df['Hour'] = df['Date-Time'].dt.hour


df.info()
df.head()

In [None]:
if df is not None:
  cols_to_move = ['Day', 'Month', 'Hour']
  new_columns = cols_to_move + [col for col in df.columns if col not in cols_to_move and col != 'Date-Time']
  df = df[new_columns]
df.head()

In [None]:
if df is not None:
  max_production = df['SystemProduction'].max()
  df['SystemProduction'] = df['SystemProduction'] / max_production

df

In [None]:
needs_scaling = [ 'WindSpeed', 'Sunshine', 'AirPressure', 'Radiation', 'AirTemperature', 'RelativeAirHumidity']
scaler = MinMaxScaler()
df[needs_scaling] = scaler.fit_transform(df[needs_scaling])
df.head()

In [None]:
corr_mat=df.corr()
plt.figure(figsize=(15,7))
sns.heatmap(corr_mat, annot=True, fmt=".3f", cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
###########################################
# Saving the processed dataset #
###########################################

import os
import joblib

drive_save_dir = ''

os.makedirs(drive_save_dir, exist_ok=True)

# Save the scaler
scaler_path = os.path.join(drive_save_dir, 'production_scaler.pkl')
joblib.dump(scaler, scaler_path)

# Save the DataFrame
df_path = os.path.join(drive_save_dir, 'production_dataset.csv')
df.to_csv(df_path)

print(f"DataFrame saved to: {df_path}")

