<a href="https://colab.research.google.com/github/IshikaGeed/HackOweek/blob/main/Hack_0_Weeek_Week1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
!pip install pandas numpy scikit-learn plotly





In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go

print("Program started successfully")

# Load data
df = pd.read_csv("PJME_hourly.csv")

# Convert time
df['Datetime'] = pd.to_datetime(df['Datetime'])

# Sort and remove duplicate timestamps
df = df.sort_values('Datetime')
df = df.drop_duplicates(subset='Datetime', keep='first')

# Set time index
df.set_index('Datetime', inplace=True)

# Ensure hourly frequency
df = df.resample('h').mean()

# Forward fill missing values
df['PJME_MW'].fillna(method='ffill', inplace=True)

# Feature engineering
# Feature engineering
df['hour'] = df.index.hour
df['day'] = df.index.dayofweek   # 0=Mon, 6=Sun
df['prev_load'] = df['PJME_MW'].shift(1)



# Moving average smoothing
df['smoothed'] = df['PJME_MW'].rolling(window=3).mean()

# Use last 7 days
recent = df.last('7D')

X = recent[['hour', 'day', 'prev_load']]
y = recent['smoothed'].fillna(method='bfill')

df.dropna(inplace=True)
# Train model
model = LinearRegression()
model.fit(X, y)
# ----- Accuracy Evaluation -----
from sklearn.metrics import mean_absolute_error, r2_score

# Use last 14 days for evaluation
eval_data = df.last('14D')
X_eval = eval_data[['hour', 'day', 'prev_load']]
y_eval = eval_data['PJME_MW']

pred_eval = model.predict(X_eval)

mae = mean_absolute_error(y_eval, pred_eval)
r2 = r2_score(y_eval, pred_eval)

print("\nModel Accuracy:")
print("Average Error (MAE):", round(mae, 2))
print("R² Score:", round(r2, 3))

# Prepare evening prediction input
evening_df = pd.DataFrame({
    'hour': [17,18,19,20,21,22],
    'day': [df.index[-1].dayofweek]*6,
    'prev_load': [df['PJME_MW'].iloc[-1]]*6
})

predicted = model.predict(evening_df)

# Detect peak hour
peak_hour = evening_df.iloc[predicted.argmax()]['hour']
print("Predicted Peak Hour:", peak_hour)
print("Predicted Load:", predicted.max())

# Plot
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=[f"{int(h)}:00" for h in evening_df['hour']],
    y=predicted,
    mode='markers+lines',
    name='Predicted Evening Peak'
))


fig.update_layout(
    title="Peak Hour Electricity Prediction",
    xaxis_title="Time",
    yaxis_title="Energy (MW)"
)

fig.show()


Program started successfully


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['PJME_MW'].fillna(method='ffill', inplace=True)
  df['PJME_MW'].fillna(method='ffill', inplace=True)
  recent = df.last('7D')
  y = recent['smoothed'].fillna(method='bfill')
  eval_data = df.last('14D')



Model Accuracy:
Average Error (MAE): 1348.25
R² Score: 0.944
Predicted Peak Hour: 17.0
Predicted Load: 35425.0828835097
