Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Loading the Dataset

In [2]:
df=pd.read_csv('car performance.csv')

Data Analysis

In [3]:
df.head(10)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
5,15.0,8,429.0,198,4341,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220,4354,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215,4312,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225,4425,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190,3850,8.5,70,1,amc ambassador dpl


In [4]:
df.shape

(398, 9)

In [5]:
df.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name'],
      dtype='object')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    int64  
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   car name      398 non-null    object 
dtypes: float64(3), int64(5), object(1)
memory usage: 28.1+ KB


In [7]:
df.nunique()

mpg             129
cylinders         5
displacement     82
horsepower       93
weight          351
acceleration     95
model year       13
origin            3
car name        305
dtype: int64

In [8]:
df.origin.unique()

array([1, 3, 2])

Handiling the Missing Values

In [9]:
df.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

In [10]:
# There is no Null Value in the data set

Lable encoding

In [11]:
# There is no Categorial value other than the car name (car name is not used for the performance predecting so we can drop the car name column), so we need not to do the label encoding

Droping the car name column

In [12]:
df=df.iloc[:,:-1]

In [13]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,18.0,8,307.0,130,3504,12.0,70,1
1,15.0,8,350.0,165,3693,11.5,70,1
2,18.0,8,318.0,150,3436,11.0,70,1
3,16.0,8,304.0,150,3433,12.0,70,1
4,17.0,8,302.0,140,3449,10.5,70,1


Splitting the dataset into dependent and independent Variable

In [14]:
x=df.iloc[:,1:]

In [15]:
y=df.iloc[:,0]

In [16]:
x.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,8,307.0,130,3504,12.0,70,1
1,8,350.0,165,3693,11.5,70,1
2,8,318.0,150,3436,11.0,70,1
3,8,304.0,150,3433,12.0,70,1
4,8,302.0,140,3449,10.5,70,1


In [17]:
y.head()

0    18.0
1    15.0
2    18.0
3    16.0
4    17.0
Name: mpg, dtype: float64

Splitting the dataset into train and test

In [18]:
from sklearn.model_selection import train_test_split
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [19]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((318, 7), (80, 7), (318,), (80,))

Normalizing the values

In [20]:
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()
x_train=sd.fit_transform(x_train)
x_test=sd.fit_transform(x_test)

In [21]:
x_train

array([[ 0.28940154, -0.27088398,  0.69344948, ..., -1.48607882,
         1.09740861,  1.80096349],
       [-0.88295118, -0.72100705, -0.18841076, ..., -0.38945744,
        -0.25311747,  0.53949567],
       [-0.88295118, -0.72100705,  0.20064523, ..., -1.09695511,
        -1.60364355,  0.53949567],
       ...,
       [ 0.28940154, -0.22299855,  0.25251936, ..., -0.95545557,
         0.8273034 , -0.72197215],
       [-0.88295118, -0.74016122, -0.60340382, ...,  1.34391184,
         1.63761905, -0.72197215],
       [ 1.46175426,  1.06033106,  0.64157535, ..., -1.27382952,
        -1.60364355, -0.72197215]])

Model Building

Implementing the RandomForestRegression Algorithm

In [22]:
from sklearn.ensemble import RandomForestRegressor

In [23]:
rf = RandomForestRegressor(n_estimators=30,random_state=0)

In [24]:
rf.fit(x_train,y_train)

RandomForestRegressor(n_estimators=30, random_state=0)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Predicting the Value

In [25]:
y_pred = rf.predict(x_test)

In [26]:
y_pred

array([19.39333333, 12.53333333, 14.43333333, 31.36666667, 15.86      ,
       22.88      , 22.75666667, 34.95      , 23.56666667, 33.18      ,
       28.54333333, 13.43333333, 17.67666667, 34.59666667, 22.94333333,
       13.13333333, 22.86666667, 16.80333333, 20.47333333, 12.53333333,
       33.80333333, 12.7       , 14.26666667, 13.5       , 25.35      ,
       29.88      , 35.03      , 20.94      , 33.68333333, 18.66333333,
       23.56666667, 23.38666667, 32.55333333, 22.9       , 23.84666667,
       14.7       , 26.23333333, 22.34666667, 21.06      , 24.94666667,
       13.43333333, 31.63666667, 23.18333333, 32.90666667, 32.89      ,
       12.73333333, 18.77333333, 19.13      , 34.09666667, 34.78666667,
       27.63333333, 23.48333333, 26.86333333, 32.51666667, 16.84666667,
       14.65666667, 21.98333333, 27.35      , 23.53333333, 17.08333333,
       34.74666667, 33.68666667, 20.23      , 18.6       , 33.41666667,
       23.93333333, 12.93333333, 25.76666667, 34.54666667, 14.6 

Model Evaluation

In [27]:
from sklearn.metrics import r2_score,mean_squared_error

In [28]:
acc = r2_score(y_test, y_pred)

In [29]:
acc

0.8893354926672756

In [30]:
err=np.sqrt(mean_squared_error(y_test,y_pred))

In [31]:
err

2.7042494337616123