In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D
from keras.optimizers import Adam
from sklearn.metrics import r2_score, mean_absolute_error

In [2]:
# Load the data
df = pd.read_csv('CARS_1.csv')
df.isnull().sum()
df['seating_capacity'].fillna(df['seating_capacity'].mean(), inplace = True)
df.isnull().sum()
df['fuel_tank_capacity'] = df['fuel_tank_capacity'].apply(lambda x:x+5)
Cars = df.rename(columns={
    'fuel_type': 'Fuel', 
    'fuel_tank_capacity': 'Tank_size', 
    'reviews_count': 'Reviews', 
    'car_name': 'Car', 
    'seating_capacity': 'Seat', 
    'body_type': 'Build', 
    'max_power_bhp': 'Power', 
    'transmission_type': 'Transmission', 
    'max_torque_nm': 'Torque', 
    'ending_price': 'Max_price',
    'engine_displacement': 'Engine',
    'rating': 'Rating',
    'no_cylinder': 'Cylinders', 
    'max_power_rp': 'Max_Power_RPM', 
    'max_torque_rpm': 'Max_Torque_RPM', 
    'starting_price': 'Starting_Price'
})
# Cars
Cars["Price"] = (Cars['Starting_Price'] + Cars['Max_price'])/2
Cars

Unnamed: 0,Car,Reviews,Fuel,Engine,Cylinders,Seat,Transmission,Tank_size,Build,Rating,Starting_Price,Max_price,Torque,Max_Torque_RPM,Power,Max_Power_RPM,Price
0,Maruti Alto K10,51,Petrol,998,3,5.0,Automatic,32.0,Hatchback,4.5,399000,583000,89.0,3500,65.71,5500,491000.0
1,Maruti Brezza,86,Petrol,1462,4,5.0,Automatic,53.0,SUV,4.5,799000,1396000,136.8,4400,101.65,6000,1097500.0
2,Mahindra Thar,242,Diesel,2184,4,4.0,Automatic,62.0,SUV,4.5,1353000,1603000,300.0,2800,130.00,3750,1478000.0
3,Mahindra XUV700,313,Diesel,2198,4,7.0,Automatic,65.0,SUV,4.5,1318000,2458000,450.0,2800,182.38,3500,1888000.0
4,Mahindra Scorpio-N,107,Diesel,2198,4,7.0,Automatic,62.0,SUV,4.5,1199000,2390000,400.0,2750,172.45,3500,1794500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,Mercedes-Benz AMG A 45 S,35,Petrol,1991,4,5.0,Automatic,5.0,Hatchback,4.5,659000,999000,500.0,5250,415.71,6750,829000.0
199,BMW 3 Series Gran Limousine,3,Petrol,1998,4,5.0,Automatic,64.0,Sedan,4.5,1041000,1041000,400.0,4400,254.79,5000,1041000.0
200,MG Hector Plus,2,Diesel,1956,4,7.0,Manual,65.0,SUV,4.5,1615000,2075000,350.0,2500,167.67,3750,1845000.0
201,Audi RS Q8,9,Petrol,3998,8,5.0,Automatic,90.0,SUV,3.5,21700000,21700000,800.0,4500,591.39,6000,21700000.0


In [3]:
# Encode the categorical variables using one-hot encoding
encoder = OneHotEncoder()
encoded = encoder.fit_transform(Cars[['Fuel', 'Transmission', 'Build']])
feature_names = encoder.get_feature_names_out(['Fuel', 'Transmission', 'Build'])
encoded_df = pd.DataFrame(encoded.toarray(), columns=feature_names)

# Combine the encoded variables with the numerical variables
X = pd.concat([Cars.drop(['Car', 'Fuel', 'Transmission', 'Build', 'Price'], axis=1), encoded_df], axis=1)
y = Cars['Price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data for use with a 1D CNN
X_train = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

In [4]:
# Build a 1D CNN model
model = Sequential()
model.add(Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(2))
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D(2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

In [5]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Fit the model to the data
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the R-squared score and MAE
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Print the results
print("R-squared score: ", r2)
print("Mean absolute error: ", mae)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
R-squared score:  0.9999773835260182
Mean absolute error:  34158.92378048781


In [18]:
import os
import pickle

# Train the model
# model = linear_model.LinearRegression()
# model.fit(X, y)

# Save the model as a pickle file in the current working directory
filename = 'cnn_model.pkl'
with open(os.path.join(os.getcwd(), filename), 'wb') as f:
    pickle.dump(model, f)