In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import pickle

# --- Data Preparation ---
df = pd.read_csv('Cleaned Data.csv')

# Define features (X) and target (y)
features = ['Trip_Distance_km', 'Passenger_Count', 'Base_Fare', 'Per_Km_Rate', 'Per_Minute_Rate', 'Trip_Duration_Minutes', 'Time_of_Day', 'Traffic_Conditions', 'Weather', 'Day_of_Week']
X = df[features]
y = df['Trip_Price']

# Split the data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# --- Model Training and Saving ---
# Using the best-performing model from previous analysis
model = RandomForestRegressor(n_estimators=200, max_depth=10, min_samples_split=5, n_jobs=-1, random_state=42)
model.fit(x_train_scaled, y_train)

# Save the trained model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Save the list of feature columns
with open('model_columns.pkl', 'wb') as f:
    pickle.dump(features, f)

print("model.pkl, scaler.pkl, and model_columns.pkl have been created successfully!")

model.pkl, scaler.pkl, and model_columns.pkl have been created successfully!
