In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report

# Load the dataset
file_path = '/content/Daily_Power_Gen_States_march_23.csv'
data = pd.read_csv(file_path)

# Extract and clean the relevant data
data_cleaned = data[['States', 'Max.Demand Met during the day(MW)', 'Shortage during maximum Demand(MW)', 'Energy Met (MU)', 'date']]
data_cleaned['date'] = pd.to_datetime(data_cleaned['date'])

# Simulate weather data: temperature, humidity, and wind speed
np.random.seed(42)
data_cleaned['temperature'] = np.random.uniform(15, 35, data_cleaned.shape[0])
data_cleaned['humidity'] = np.random.uniform(40, 80, data_cleaned.shape[0])
data_cleaned['wind_speed'] = np.random.uniform(0, 15, data_cleaned.shape[0])
data_cleaned['holiday_flag'] = np.random.choice([0, 1], data_cleaned.shape[0], p=[0.8, 0.2])
data_cleaned['real_estate_growth'] = np.random.uniform(0.01, 0.05, data_cleaned.shape[0])

# Define features (X) and target (y)
X = data_cleaned[['temperature', 'humidity', 'wind_speed', 'holiday_flag', 'real_estate_growth']]
y = data_cleaned['Max.Demand Met during the day(MW)']

# Create categories (bins) for electricity demand (low, medium, high)
y_class = pd.cut(y, bins=[0, 1000, 5000, np.inf], labels=['low', 'medium', 'high'])

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# List of algorithms to evaluate for regression and classification
regression_models = {
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Linear Regression': LinearRegression(),
    'KNN': KNeighborsRegressor(n_neighbors=5),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'SVM': SVR(kernel='rbf')
}

classification_models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'SVM': SVC(kernel='linear')
}

# Function to train, predict, and evaluate regression models
def evaluate_regression_models():
    for name, model in regression_models.items():
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        print(f"\n{name} Regression Model Performance:")
        print(f"Mean Squared Error (MSE): {mse:.2f}")
        print(f"R² Score: {r2:.2f}")

# Function to train, predict, and evaluate classification models
def evaluate_classification_models():
    for name, model in classification_models.items():
        model.fit(X_train_scaled, y_train_class)
        y_pred_class = model.predict(X_test_scaled)

        accuracy = accuracy_score(y_test_class, y_pred_class)
        print(f"\n{name} Classification Model Performance:")
        print(f"Accuracy Score: {accuracy:.2f}")
        print(f"Classification Report:\n{classification_report(y_test_class, y_pred_class)}")

# Function to get user input, predict, and compare with actual demand from dataset
def get_user_input_and_compare():
    temperature = float(input("Enter the temperature (°C): "))
    humidity = float(input("Enter the humidity (%): "))
    wind_speed = float(input("Enter the wind speed (km/h): "))
    holiday_flag = int(input("Is it a holiday? (1 for yes, 0 for no): "))
    real_estate_growth = float(input("Enter the real estate growth (percentage, e.g., 0.02 for 2%): "))

    user_input = pd.DataFrame({
        'temperature': [temperature],
        'humidity': [humidity],
        'wind_speed': [wind_speed],
        'holiday_flag': [holiday_flag],
        'real_estate_growth': [real_estate_growth]
    })

    user_input_scaled = scaler.transform(user_input)

    # Using Random Forest as the default model for user prediction
    predicted_demand = regression_models['Random Forest'].predict(user_input_scaled)[0]

    # Find the closest match in the dataset based on the input
    data_cleaned['difference'] = (
        abs(data_cleaned['temperature'] - temperature) +
        abs(data_cleaned['humidity'] - humidity) +
        abs(data_cleaned['wind_speed'] - wind_speed) +
        abs(data_cleaned['holiday_flag'] - holiday_flag) +
        abs(data_cleaned['real_estate_growth'] - real_estate_growth)
    )

    closest_record = data_cleaned.loc[data_cleaned['difference'].idxmin()]
    actual_demand = closest_record['Max.Demand Met during the day(MW)']

    print(f"\nPredicted Electricity Demand: {predicted_demand:.2f} MW")
    print(f"Actual Electricity Demand from dataset: {actual_demand:.2f} MW")
    print(f"Difference: {predicted_demand - actual_demand:.2f} MW")

# Evaluate all regression models
evaluate_regression_models()

# Evaluate all classification models
evaluate_classification_models()

# Get user input and compare with actual demand
get_user_input_and_compare()
