In [1]:
import pandas as pd
import pickle
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [7]:

# Load dataset
df = pd.read_csv("State_Consumption.csv")

# Save encoders for categorical columns
encoders = {}
for col in ["States", "Regions"]:  # categorical columns
    if col in df.columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
        encoders[col] = le

In [8]:
joblib.dump(encoders, "encoders.pkl")
print("📂 Label encoders saved as encoders.pkl")

# Encode any remaining object columns
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    print(f"Encoded column: {col}")

📂 Label encoders saved as encoders.pkl
Encoded column: Dates


In [9]:

# Features and Target
X = df.drop(columns=["Usage"])   # Features
y = df["Usage"]                  # Target variable

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("✅ Model Training Complete")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

✅ Model Training Complete
Mean Absolute Error: 16.13
R² Score: 0.94


In [10]:
# Save the trained model
with open("state_consumption_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("📂 Model saved as state_consumption_model.pkl")


📂 Model saved as state_consumption_model.pkl
