# Machile Learning using linear regression

Objective of this program is to predict mpg of a car based on information about its: weight, number of cylinders, horsepower etc.

## 1) Imports

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import preprocessing
import numpy as np
from sklearn.metrics import mean_absolute_error ,mean_squared_error, mean_squared_log_error

## 2) Creating list with column names (csv file doesn't have column names)

In [2]:
col_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'car name']

## 3) Reading csv file

In [3]:
df_cars = pd.read_csv('auto-mpg.data', header=None, names = col_names, delim_whitespace=True)
df_cars.head(10)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino
5,15.0,8,429.0,198.0,4341.0,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220.0,4354.0,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215.0,4312.0,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225.0,4425.0,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190.0,3850.0,8.5,70,1,amc ambassador dpl


## 4) Handling missing data by deleting rows with missing values

In [4]:
# replacing '?' value with NaN
df_cars = df_cars.replace('?', np.nan)
# deleting rows with NaN values
df_cars= df_cars.dropna()

## 5) Separating class values (labels) from other attributes. 
Spliting the data to create train and test dataframes (70%:30% split); normalization

In [5]:
X = df_cars.drop(['mpg', 'car name'], axis=1)
y = df_cars['mpg']

In [6]:
X = preprocessing.normalize(X, norm='l1')

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

## 6) Creating model and training

In [8]:
# fitting model
model = linear_model.LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

## 7) Model evaluation

In [9]:
# prediction based on X_test
y_pred = model.predict(X_test)

In [20]:
# evaluating model
print('MAE: ' + str(mean_absolute_error(y_test, y_pred)))
print('MSE: ' + str(mean_squared_error(y_test, y_pred)))
print('MSLE: ' + str(mean_squared_log_error(y_test, y_pred)))

MAE: 2.5432997881355925
MSE: 12.538185662738346
MSLE: 0.018793035808176206
