In [13]:
# Step 1 — Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Step 2 — Load your dataset
df = pd.read_csv("Waste_Management_with_Extra_Features.csv")

# Step 3 — Define X (inputs) and y (target)
X = df[[
    "city/district",
    "waste_type",
    "disposal_method",
    "recycling_rate_(%)",
    "population_density_(people/km²)",
    "municipal_efficiency_score_(1_10)",
    "cost_of_waste_management_(₹/ton)",
    "community_participation_score",
    "green_technology_adoption"
]]

y = df["waste_generated_(tons/day)"]

# Step 4 — Define categorical columns
categorical_cols = ["city/district", "waste_type", "disposal_method"]

# Step 5 — Preprocess and build model pipeline
preprocessor = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)],
    remainder='passthrough'
)

from sklearn.ensemble import RandomForestRegressor

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(
        n_estimators=200,   # number of trees (try 200–500 for better results)
        max_depth=10,       # controls tree depth — prevents overfitting
        random_state=42,    # ensures reproducibility
        n_jobs=-1           # uses all CPU cores for speed
    ))
])



# Step 6 — Split, train, and predict
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Step 7 — Evaluate performance
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.3f}")
print(f"MAE: {mae:.3f}")
print(f"RMSE: {rmse:.3f}")

# Step 8 — Create results DataFrame
results = X_test.copy()
results["Actual_Waste_(tons/day)"] = y_test
results["Predicted_Waste_(tons/day)"] = y_pred

# Optional: Save results for Power BI
results.to_csv("Regression_Results_Waste_Prediction.csv", index=False)

print("\n✅ Regression completed and results saved successfully.")



R² Score: -0.074
MAE: 2659.380
RMSE: 3002.340

✅ Regression completed and results saved successfully.
