MACHINE LEARNING BASED VEHICLE PERFORMANCE ANALYZER

DATA PREPROCESSING

1. LIB FILES

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

2. DATASET LOADING

In [35]:
dat=pd.read_csv('./car_performance.csv')

3. PRIMARY ANALYSIS

In [4]:
dat.head()          

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [6]:
dat.shape

(398, 9)

In [8]:
dat.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name'],
      dtype='object')

In [12]:
dat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    int64  
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   car name      398 non-null    object 
dtypes: float64(3), int64(5), object(1)
memory usage: 28.1+ KB


In [13]:
dat.nunique()

mpg             129
cylinders         5
displacement     82
horsepower       93
weight          351
acceleration     95
model year       13
origin            3
car name        305
dtype: int64

In [14]:
dat.origin.unique()

array([1, 3, 2], dtype=int64)

4. HANDILING NULL VALUES

In [15]:
dat.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

5. DROPING COLUMN - CAR_NAMES

In [18]:
dat=dat.iloc[:,:-1]
dat.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year
0,18.0,8,307.0,130,3504,12.0,70
1,15.0,8,350.0,165,3693,11.5,70
2,18.0,8,318.0,150,3436,11.0,70
3,16.0,8,304.0,150,3433,12.0,70
4,17.0,8,302.0,140,3449,10.5,70


6. SPLITTING DATASET TO DEPENDENT AND INDEPENDENT VARIABLES

In [23]:
x=dat.iloc[:,1:]
y=dat.iloc[:,0]

In [24]:
x.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model year
0,8,307.0,130,3504,12.0,70
1,8,350.0,165,3693,11.5,70
2,8,318.0,150,3436,11.0,70
3,8,304.0,150,3433,12.0,70
4,8,302.0,140,3449,10.5,70


In [25]:
y.head()

0    18.0
1    15.0
2    18.0
3    16.0
4    17.0
Name: mpg, dtype: float64

7. TRAINING AND TESTING

In [26]:
from sklearn.model_selection import train_test_split
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [27]:
x_train.shape

(318, 6)

In [28]:
x_test.shape

(80, 6)

In [29]:
y_train.shape

(318,)

In [30]:
y_test.shape

(80,)

8. NORMALIZING VALUES

In [31]:
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()
x_train=sd.fit_transform(x_train)
x_test=sd.fit_transform(x_test)

In [32]:
x_train

array([[ 0.33833667,  0.39501758, -0.08957707,  0.39100127, -0.03507182,
        -1.40047065],
       [ 1.52718818,  1.0823806 ,  0.71737786,  1.09542699, -0.21380323,
        -0.03268912],
       [-0.85051483, -0.85205532, -0.78893802, -1.19214108, -0.10656439,
         1.60864872],
       ...,
       [ 1.52718818,  2.58475976,  3.27273516,  2.4038191 , -1.64365451,
        -0.85335804],
       [-0.85051483, -0.50837381, -0.84273501, -0.68258227,  1.39477945,
        -0.85335804],
       [ 1.52718818,  1.55371524,  1.25534782,  2.09881002, -0.39253464,
        -0.57980173]])

In [33]:
x_test

array([[-8.80696034e-01, -1.05702951e+00, -1.12834643e+00,
        -1.34056681e+00,  2.53249459e-01,  1.42495121e+00],
       [-8.80696034e-01, -6.92842872e-01, -4.63040822e-01,
        -5.35470587e-01, -1.35617273e-01,  1.16228740e+00],
       [ 2.55685945e-01, -8.88260092e-03, -4.15518993e-01,
        -3.74451342e-01, -1.74503946e-01, -1.46435079e+00],
       [ 2.55685945e-01,  0.00000000e+00, -5.34323566e-01,
        -4.40817382e-01,  2.14362786e-01, -1.46435079e+00],
       [-8.80696034e-01, -8.43847087e-01, -1.05706369e+00,
        -9.43458539e-01, -2.91163965e-01,  1.68761503e+00],
       [-8.80696034e-01, -4.35247445e-01, -4.15518993e-01,
        -2.79798137e-01,  9.92096249e-01,  1.68761503e+00],
       [ 1.39206792e+00,  2.03411561e+00,  2.38826892e+00,
         1.78516488e+00, -1.72997087e+00, -9.39023151e-01],
       [ 2.55685945e-01,  2.75360628e-01,  5.96992978e-02,
         9.95300208e-01,  2.15869644e+00, -1.51031696e-01],
       [-8.80696034e-01, -9.68203500e-01, -9.144