### **Importing libraries**

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

### **Data Collection**

In [2]:
car_df = pd.read_csv('car data.csv')
car_df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.60,6.87,42450,Diesel,Dealer,Manual,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.00,5.90,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.00,87934,Petrol,Dealer,Manual,0
299,city,2017,11.50,12.50,9000,Diesel,Dealer,Manual,0


### **Data Preprocessing**

In [3]:
scaler = StandardScaler()
new_df = scaler.fit_transform(car_df[['Present_Price','Selling_Price']])
pd.DataFrame(new_df)

new_df = car_df[['Present_Price','Selling_Price']].corr()

heatmap = go.Heatmap(x=new_df.columns,
                     y=new_df.columns,
                     z=new_df.values,
                     colorscale='earth')

layout = go.Layout(title='Correlation Analysis between present price and selling price of car')

fig = go.Figure(data=[heatmap], layout=layout)

fig.show()

### **Data Visualization**

In [4]:
car_df.describe()

Unnamed: 0,Year,Selling_Price,Present_Price,Driven_kms,Owner
count,301.0,301.0,301.0,301.0,301.0
mean,2013.627907,4.661296,7.628472,36947.20598,0.043189
std,2.891554,5.082812,8.642584,38886.883882,0.247915
min,2003.0,0.1,0.32,500.0,0.0
25%,2012.0,0.9,1.2,15000.0,0.0
50%,2014.0,3.6,6.4,32000.0,0.0
75%,2016.0,6.0,9.9,48767.0,0.0
max,2018.0,35.0,92.6,500000.0,3.0


### Selling price of car in every year

In [5]:
px.scatter(x='Year',y='Selling_Price',color='Car_Name',data_frame=car_df)

### Car_Name and Present_price

In [6]:
fig = px.histogram(x='Car_Name',y='Present_Price',color='Year',data_frame=car_df)

fig.show()

### Car_Name and Selling_Price

In [7]:
fig = go.Figure(data = [go.Box(x=car_df['Car_Name'], y=car_df['Selling_Price'])])

fig.update_layout(title='Analysis between car name and selling price')

### Driven_km and Fuel_Type

In [8]:
fig = go.Figure(data = [go.Bar(x=car_df['Fuel_Type'].unique(),y=car_df['Driven_kms'])])
fig.update_layout(title='Analysis between Fuel type of car and total driven kms')
fig.show()

### Car_Name and Driven_kms

In [9]:
fig = go.Figure(data = [go.Scatter(x=car_df['Car_Name'].unique(),y=car_df['Driven_kms'],mode='markers',marker=dict(color='purple',size=15))])
fig.update_layout(title='Analysis between total Driven kms of each car')

fig.show()

### Variable encoding:

In [10]:
label_encoder = LabelEncoder()

car_df['Fuel_Type'] = label_encoder.fit_transform(car_df['Fuel_Type'])
car_df['Transmission'] = label_encoder.fit_transform(car_df['Transmission'])
car_df['Selling_type'] = label_encoder.fit_transform(car_df['Selling_type'])

### **Model Building with LinearRegression**

In [11]:
X = car_df.drop(['Car_Name','Selling_Price'],axis=1)
Y = car_df['Selling_Price']

In [12]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
X_train,X_test,Y_train,Y_test

(     Year  Present_Price  Driven_kms  Fuel_Type  Selling_type  Transmission  \
 185  2008           0.58        1900          2             1             0   
 72   2013          18.61       56001          2             0             1   
 15   2016          10.79       43000          1             0             1   
 10   2017           3.60        2135          2             0             1   
 196  2008           0.52      500000          2             1             0   
 ..    ...            ...         ...        ...           ...           ...   
 188  2013           0.57       18000          2             1             1   
 71   2011          12.48       45000          1             0             1   
 106  2014           3.45       16500          2             1             1   
 270  2011          10.00       69341          2             0             1   
 102  2017           1.78        4000          2             1             1   
 
      Owner  
 185      0  
 72       

In [13]:
ln_data = LinearRegression()

#fitting data into model
ln_data.fit(X_train,Y_train)

In [14]:
trained_X = ln_data.predict(X_train)

r_score = r2_score(Y_train,trained_X)
print("R squared score :",r_score)

error_score = mean_squared_error(Y_train,trained_X)
print("mean squared Error :",error_score)

R squared score : 0.8757287527605655
mean squared Error : 3.0499440088069822


In [15]:
  # Define colors as needed

data = pd.DataFrame({'trained_X': trained_X, 'Y_train': Y_train})

fig = px.scatter(data, x='trained_X', y='Y_train', color='Y_train')

fig.show()


In [16]:
test_X = ln_data.predict(X_test)

r_score = r2_score(Y_test,test_X)
print('r score: ', r_score)

error_score = mean_squared_error(Y_test,test_X)
print('mean square error: ', error_score)

r score:  0.8772443540167388
mean square error:  3.495368886010676


In [17]:
data = pd.DataFrame({'test_X': test_X, 'Y_test': Y_test})

fig = px.scatter(data, x='test_X', y='Y_test')

fig.show()


In [18]:
car_data = pd.DataFrame({

    'Year': [2013],
    'Present_Price': [1.90],
    'Driven_kms': [54000],
    'Fuel_Type': [2],
    'Selling_Type': [1],
    'Transmission': [1],
    'Owner': [0],
})


car_data.rename(columns={'Selling_Type': 'Selling_type'}, inplace=True)

# Making predictions
predictions = ln_data.predict(car_data)

# Displaying the predictions
print(predictions)


[0.75076485]
