In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [4]:
file_path = 'C:/Users/bhumi/Downloads/archive (7)/train_dataframes.xlsx'  
df = pd.read_excel(file_path)

In [5]:
threshold = df['DEMAND'].mean()  
df['HighDemand'] = (df['DEMAND'] > threshold).astype(int)

In [6]:
X = df.drop(columns=['datetime', 'DEMAND', 'HighDemand']) 
y = df['HighDemand'] 

In [7]:
# Handle categorical features
X = pd.get_dummies(X, columns=['dayOfWeek', 'holiday'], drop_first=True)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

In [10]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.94      0.94      0.94      3815
           1       0.93      0.94      0.94      3529

    accuracy                           0.94      7344
   macro avg       0.94      0.94      0.94      7344
weighted avg       0.94      0.94      0.94      7344



In [11]:
import joblib
joblib.dump(clf, 'demand_classifier.pkl')

['demand_classifier.pkl']

In [12]:
#Predicting with new data

In [13]:
import pandas as pd
import joblib

model_path = 'demand_classifier.pkl'  
clf = joblib.load(model_path)

new_data = pd.DataFrame({
    'week_X-2': [450],
    'week_X-3': [460],
    'week_X-4': [470],
    'MA_X-4': [455],
    'dayOfWeek': [3], 
    'weekend': [0],  
    'holiday': [1], 
    'Holiday_ID': [12],  
    'hourOfDay': [14], 
    'T2M_toc': [22] 
})

new_data = pd.get_dummies(new_data, columns=['dayOfWeek', 'holiday'], drop_first=True)

for col in clf.feature_names_in_:
    if col not in new_data.columns:
        new_data[col] = 0  
new_data = new_data[clf.feature_names_in_]

predictions = clf.predict(new_data)
predicted_classes = ["High Demand" if pred == 1 else "Low Demand" for pred in predictions]

print("Predictions:", predicted_classes)

probabilities = clf.predict_proba(new_data)
print("Prediction Probabilities:", probabilities)


Predictions: ['Low Demand']
Prediction Probabilities: [[0.85 0.15]]
