In [None]:
# LINEAR REGRESSION PRACTICE FOR MTCARS

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression

# ​# Show fewer digits to improve readability
np.set_printoptions(precision=3, suppress=True)

In [None]:
#download the "MTCars" database as a TSV file with no header row
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
               'Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(url, names=column_names,
                         na_values='?', comment='\t',
                         sep=' ', skipinitialspace=True)
dataset = dataset.dropna()#drop cars with incomplete data
dataset.tail()

In [None]:
#adding nation of origin to as binary variable
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')
dataset.tail()

In [None]:
#split into train and test sets and plot
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
pp = sns.pairplot(data=train_dataset,
                 x_vars=['Cylinders', 'Displacement', 'Weight','Model Year'],
                 y_vars=['MPG'])
plt.savefig('plots-four-features.pdf')
plt.savefig('plots-four-features.svg')
plt.savefig('plots-four-features-dpi-72.png',dpi=72)
plt.savefig('plots-four-features-dpi-300.png',dpi=300)

In [None]:
#create training features and labels
train_features = train_dataset.copy()
test_features = test_dataset.copy()
#use MPG as regression targets
train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')
#train sklearn linear model
linear_model = LinearRegression().fit(train_features, train_labels)

In [None]:
#test first row of test_features prediction
linear_model.predict([test_features.iloc[0]])

In [None]:
#get coef and intercept

print(linear_model.coef_)
print(linear_model.intercept_)

In [None]:
chart_data = pd.DataFrame()
chart_data["Index"] = pd.DataFrame(range(0,len(test_features)))
chart_data["Predicted"] = pd.DataFrame(linear_model.predict(test_features))
chart_data["True MPG"] = pd.DataFrame(test_labels.to_list())
chart_data["ABS Error"] = abs(chart_data["True MPG"] - chart_data["Predicted"])
chart_data["Weight"] = pd.DataFrame(test_features["Weight"].to_list())

#print out the test set
pp = sns.scatterplot(data=chart_data,
                 x='Weight',
                 y='True MPG',hue="ABS Error",palette=sns.color_palette("coolwarm", as_cmap=True))
plt.savefig('plots-lm-test-output.pdf')
plt.savefig('plots-lm-test-output.svg')
plt.savefig('plots-lm-test-output-dpi-72.png',dpi=72)
plt.savefig('plots-lm-test-output-dpi-300.png',dpi=300)

In [None]:
#increasing horsepower by 300 in the first row predicts MPG to a lower amount
weight_offset = 300
print("With",weight_offset,"more lbs weight, MPG predicted at",linear_model.predict([[8,390,190,3850+weight_offset,8.5,70,0,0,1]]))

#increasing horsepower by 3000 in the first row predicts MPG to a lower amount, which is negative
weight_offset = 3000
print("With",weight_offset,"more lbs weight, MPG predicted at",linear_model.predict([[8,390,190,3850+weight_offset,8.5,70,0,0,1]]))