# Streamlit Applicaton

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pickle

# Sample data loading (replace with your actual path)
data = pd.read_excel('Cleaned_Cars_Data_Capped_Outlierss.xlsx')

# Define columns
important_numerical_cols = ['Width','YearofManufacture','kmsDriven', 'Length','Torque', 'Height', 'Mileage','GearBox',
                            'No_owner', 'SeatingCapacity', 'Topspeed', 'Mileage']
important_categorical_cols = ['City', 'fueltype', 'bodytype','oemodel', 'Color','Transmission',  'Steering Type','InsuranceValidity']

# Define X and y
X = data[important_numerical_cols + important_categorical_cols]
y = data['price']

# Separate numerical and categorical data
X_numerical = X[important_numerical_cols]
X_categorical = X[important_categorical_cols]

# Scale numerical data
scaler = StandardScaler()
X_numerical_scaled = scaler.fit_transform(X_numerical)

# Encode categorical data
encoder = OneHotEncoder(handle_unknown='ignore')
X_categorical_encoded = encoder.fit_transform(X_categorical).toarray()

# Combine processed numerical and categorical data
X_processed = np.hstack([X_numerical_scaled, X_categorical_encoded])



In [4]:

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.3, random_state=0)

# Train the model
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)


In [6]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Test MAE: {mae:.4f}")
print(f"Test MSE: {mse:.4f}")
print(f"Test R^2: {r2:.4f}")


Test MAE: 0.9531
Test MSE: 3.4238
Test R^2: 0.8477


In [13]:

# Save the trained model, scaler, and encoder
with open('car_price_model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

with open('encoder.pkl', 'wb') as file:
    pickle.dump(encoder, file)

print("Model, scaler, and encoder saved successfully.")

Model, scaler, and encoder saved successfully.
