# Auto MPG Regression

## Import libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Import dataset

This is a dataset containing several values related to almost 400 cars

In [3]:
dataset = pd.read_csv("/home/joe/Documents/ML-Resources/auto-mpg.csv")
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Clean missing values

The dataset has 6 missing values

In [4]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(x[:, 1:4])
x[:, 1:4] = imputer.transform(x[:, 1:4])

## Split dataset

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)

## Train the model

Multiple linear regression so I can get coefficient and intercept values

In [6]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

## Predict test result sets

In [7]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), Y_test.reshape(len(Y_test),1)),1))

[[29.5  29.  ]
 [26.43 30.7 ]
 [21.04 20.2 ]
 [23.8  17.  ]
 [23.22 20.6 ]
 [31.03 29.  ]
 [26.53 24.  ]
 [33.9  34.1 ]
 [15.04 14.5 ]
 [22.43 18.  ]
 [29.6  27.  ]
 [16.42 15.5 ]
 [22.06 19.2 ]
 [25.43 21.5 ]
 [34.69 39.  ]
 [24.38 25.  ]
 [ 9.1  14.  ]
 [22.78 21.  ]
 [10.94 13.  ]
 [31.28 37.3 ]
 [22.65 18.  ]
 [25.6  24.5 ]
 [24.37 27.  ]
 [24.25 28.  ]
 [24.01 19.1 ]
 [ 7.43 11.  ]
 [29.46 32.9 ]
 [31.21 39.4 ]
 [ 9.29 15.  ]
 [28.98 22.  ]
 [27.89 38.  ]
 [ 9.37 14.  ]
 [25.76 26.  ]
 [29.31 27.2 ]
 [29.07 26.  ]
 [16.42 14.  ]
 [28.52 27.  ]
 [15.3  13.  ]
 [32.18 32.  ]
 [16.53 16.  ]
 [25.71 23.  ]
 [30.46 30.5 ]
 [19.15 16.5 ]
 [31.56 34.5 ]
 [28.19 33.5 ]
 [24.49 20.8 ]
 [15.59 16.  ]
 [10.7  14.  ]
 [24.85 23.  ]
 [22.14 17.5 ]
 [26.84 26.  ]
 [32.51 40.8 ]
 [17.62 13.  ]
 [12.75 14.  ]
 [30.87 30.  ]
 [23.3  23.  ]
 [10.06 13.  ]
 [15.65 17.  ]
 [28.93 28.  ]
 [34.78 32.  ]
 [34.55 37.  ]
 [34.61 38.  ]
 [17.41 17.  ]
 [30.89 30.  ]
 [32.64 31.9 ]
 [33.46 38.1 ]
 [27.53 26

## Making a single prediction

In [22]:
print(regressor.predict([[8,6.5,495,3535,3.8,20]]))

[-23.68]


## Getting final coefficient and intercept

In [23]:
print(regressor.coef_)
print(regressor.intercept_)

[ 1.87e-01 -8.67e-04 -5.54e-04 -6.85e-03  7.13e-02  7.65e-01]
-16.235797089555856


MPG = 0.187 x cylinders - 0.000867 x displacement - 0.000554 x horsepower - 0.00685 x weight - 0.0713 x acceleration + 0.765 x model year - 16.235797089555856

In [24]:
from sklearn.metrics import r2_score
r2_score(Y_test, y_pred)

0.797304403123875