In [None]:
import pandas as pd

file_path = r"C:\Users\gsrmb\Downloads\Renewable_Cleaned.csv"
df = pd.read_csv(file_path)

def remove_outliers(df, columns):
    Q1 = df[columns].quantile(0.25)  
    Q3 = df[columns].quantile(0.75) 
    IQR = Q3 - Q1  
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    return df[~((df[columns] < lower_bound) | (df[columns] > upper_bound)).any(axis=1)]

columns_to_check = ['sunlightTime', 'dayLength', 'Energy delta[Wh]', 'GHI']

df_cleaned = remove_outliers(df, columns_to_check)

print(f"Original dataset shape: {df.shape}")
print(f"Cleaned dataset shape: {df_cleaned.shape}")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

if 'Time' in df.columns:
    df['Time'] = pd.to_datetime(df['Time'])
else:
    raise KeyError("Column 'Time' not found in the dataset.")

def remove_outliers(df, columns):
    Q1 = df[columns].quantile(0.25)
    Q3 = df[columns].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[~((df[columns] < lower_bound) | (df[columns] > upper_bound)).any(axis=1)]

columns_to_check = ['sunlightTime', 'dayLength', 'Energy delta[Wh]', 'GHI']
df_cleaned = remove_outliers(df, columns_to_check)

df_cleaned = df_cleaned.set_index('Time')

df_monthly = df_cleaned.resample('M').mean()

plt.figure(figsize=(12, 5))
plt.plot(df_monthly.index, df_monthly['Energy delta[Wh]'], label="Energy Delta", color="blue")
plt.plot(df_monthly.index, df_monthly['GHI'], label="GHI", color="orange")
plt.plot(df_monthly.index, df_monthly['sunlightTime'], label="Sunlight Time", color="green")
plt.legend()
plt.title("Energy Delta, GHI, and Sunlight Time Trends Over Time")
plt.xlabel("Time")
plt.ylabel("Values")
plt.show()

result = seasonal_decompose(df_monthly['Energy delta[Wh]'], model='additive', period=12)

plt.figure(figsize=(12, 8))
plt.subplot(411)
plt.plot(result.observed, label='Observed')
plt.legend()
plt.subplot(412)
plt.plot(result.trend, label="Trend", color="red")
plt.legend()
plt.subplot(413)
plt.plot(result.seasonal, label="Seasonal", color="green")
plt.legend()
plt.subplot(414)
plt.plot(result.resid, label="Residual", color="purple")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


file_path = r"C:\Users\gsrmb\Downloads\Renewable_Cleaned.csv"
df = pd.read_csv(file_path)


correlation_matrix = df[['sunlightTime', 'dayLength', 'Energy delta[Wh]', 'GHI']].corr()


plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix: Sunlight, Day Length, Energy Delta & GHI")
plt.xlabel("Factors Affecting Energy Delta")
plt.ylabel("Correlation Strength")
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(x=df['sunlightTime'], y=df['Energy delta[Wh]'], alpha=0.5, label="Data Points")
plt.title("Sunlight Time vs. Energy Delta")
plt.xlabel("Sunlight Time (minutes)")
plt.ylabel("Energy Delta (Wh)")
plt.legend(loc="upper left", frameon=True)
plt.show()


plt.figure(figsize=(8, 6))
sns.scatterplot(x=df['sunlightTime'], y=df['GHI'], alpha=0.5, label="Data Points")
plt.title("Sunlight Time vs. GHI")
plt.xlabel("Sunlight Time (minutes)")
plt.ylabel("Global Horizontal Irradiance (GHI)")
plt.legend(loc="upper left", frameon=True)
plt.show()


plt.figure(figsize=(10, 6))
sns.boxplot(x=df['month'], y=df['dayLength'], palette="viridis")
plt.title("Seasonal Variation of Day Length")
plt.xlabel("Month")
plt.ylabel("Day Length (minutes)")
plt.xticks(ticks=range(0, 12), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.legend(["Day Length Distribution"], loc="upper right", frameon=True)
plt.show()