In [None]:
# Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [None]:
# Step 2: Load Data
plant1 = pd.read_csv('/content/drive/MyDrive/Final_Merged_Plant_1.csv')
plant2 = pd.read_csv('/content/drive/MyDrive/Final_Merged_Plant_2.csv')

plant1['DATE_TIME'] = pd.to_datetime(plant1['DATE_TIME'])
plant2['DATE_TIME'] = pd.to_datetime(plant2['DATE_TIME'])

plant1['PLANT'] = 'Plant_1'
plant2['PLANT'] = 'Plant_2'

combined_df = pd.concat([plant1, plant2], ignore_index=True)
combined_df.dropna(inplace=True)

In [None]:
# Step 3: Exploratory Data Analysis (EDA)

# 3.1 Overview of the Dataset
print(combined_df.info())
print(combined_df.describe())

# 3.2 Missing Values Check
print(combined_df.isnull().sum())

# Print the first few columns (say, first 3 columns) for the first few rows
print(combined_df.iloc[:, :].head(10))

In [None]:
# 3.3 Correlation Heatmap
plt.figure(figsize=(10,6))
sns.heatmap(combined_df[['IRRADIATION', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'AC_POWER']].corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation with AC Power')
plt.show()

In [None]:
# 3.4 Total Power Output over Time
plt.figure(figsize=(14,6))
sns.lineplot(data=combined_df, x='DATE_TIME', y='AC_POWER', hue='PLANT')
plt.title('Total AC Power Output Over Time')
plt.xlabel('Date')
plt.ylabel('AC Power (kW)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# 3.5 Scatter Plot: Irradiation vs AC Power
plt.figure(figsize=(10,5))
sns.scatterplot(data=combined_df, x='IRRADIATION', y='AC_POWER', hue='PLANT', alpha=0.7)
plt.title('Irradiation vs AC Power Scatter Plot')
plt.show()