In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score


In [28]:
df = pd.read_csv("D:\Sem 8\carsales\Carprices.csv")

In [29]:
df

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,Price
0,Maruti,Mumbai,2010,72000,Electric,Manual,First,26.60,998,58.16,5,1.75
1,Hyundai,Pune,2015,41000,Diesel,Manual,First,19.67,1582,126.20,5,12.50
2,Honda,Chennai,2011,46000,Petrol,Manual,First,18.20,1199,88.70,5,4.50
3,Maruti,Chennai,2012,87000,Diesel,Manual,First,20.77,1248,88.76,7,6.00
4,Audi,Coimbatore,2013,40670,Diesel,Automatic,Second,15.20,1968,140.80,5,17.74
...,...,...,...,...,...,...,...,...,...,...,...,...
5978,Maruti,Delhi,2014,27365,Diesel,Manual,First,28.40,1248,74.00,5,4.75
5979,Hyundai,Jaipur,2015,100000,Diesel,Manual,First,24.40,1120,71.00,5,4.00
5980,Mahindra,Jaipur,2012,55000,Diesel,Manual,Second,14.00,2498,112.00,8,2.90
5981,Maruti,Kolkata,2013,46000,Petrol,Manual,First,18.90,998,67.10,5,2.65


In [30]:
# Label Encoding categorical features
label_encoders = {}
for column in ['Name','Location', 'Fuel_Type', 'Transmission', 'Owner_Type']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Separating features and target variable
X = df.drop('Price', axis=1)
y = df['Price']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [32]:
rmodel = RandomForestRegressor(n_estimators=100, random_state=42)
rmodel.fit(X_train, y_train)


In [33]:
y_pred = rmodel.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 14.99493164551888
R^2 Score: 0.872164344193827


In [34]:
model = LinearRegression()
model.fit(X_train, y_train)


In [35]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 36.29436550189326
R^2 Score: 0.6905811826498052


In [38]:
new_data = {
    'Name': ['Maruti'],
    'Location': ['Mumbai'],
    'Year': [2022],
    'Kilometers_Driven': [50000],
    'Fuel_Type': ['Electric'],
    'Transmission': ['Manual'],
    'Owner_Type': ['First'],
    'Mileage': [20.00],
    'Engine': [1200],
    'Power': [70.00],
    'Seats': [5]
}

new_df = pd.DataFrame(new_data)

# Apply Label Encoding and Scaling
for column in ['Name','Location', 'Fuel_Type', 'Transmission', 'Owner_Type']:
    new_df[column] = label_encoders[column].transform(new_df[column])

new_df_scaled = scaler.transform(new_df)

# Predict
predicted_price = model.predict(new_df_scaled)
print(f'Predicted Price: {predicted_price[0]} lakhs')


Predicted Price: 11.345193647196055 lakhs


In [39]:
import pickle
import joblib


In [40]:
# Saving the model to a file using pickle
with open('linear_regression_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [41]:
import pickle

# Save the model to a .sav file
with open('linear_regression_model.sav', 'wb') as file:
    pickle.dump(model, file)


In [42]:
import pickle

# Save the model to a .sav file
with open('ada_model.sav', 'wb') as file:
    pickle.dump(rmodel, file)
