In [2]:
import pandas as pd
import numpy as np
import joblib  # ✅ used instead of pickle

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load Dataset
df = pd.read_csv("weatherAus.csv")
df.columns = df.columns.str.lower().str.strip()
df.replace("None", np.nan, inplace=True)
df['temp'] = pd.to_numeric(df['temp'], errors='coerce')

# Drop unnecessary columns
df.drop(['date', 'time'], axis=1, inplace=True)

# Define features and target
X = df.drop("traffic_volume", axis=1)
y = df["traffic_volume"].astype(float)

# Define column types
categorical_features = ["holiday", "weather"]
numerical_features = ["temp", "rain", "snow"]

# Preprocessing pipelines
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="mean")),
    ('scaler', StandardScaler())
])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="most_frequent")),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing
preprocessor = ColumnTransformer([
    ('num', num_pipeline, numerical_features),
    ('cat', cat_pipeline, categorical_features)
])

# Full pipeline with model (reduced n_estimators for size)
model_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=25, random_state=42))  # Reduced from 100
])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit model
model_pipeline.fit(X_train, y_train)

# Evaluate
y_pred = model_pipeline.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R2 Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")

# ✅ Save compressed model
joblib.dump(model_pipeline, "model_pipeline.pkl", compress=3)


R2 Score: -0.1571
RMSE: 2138.82


['model_pipeline.pkl']