In [47]:
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# Read the dataset from a tab-separated file
df = pd.read_csv('gasoline_use.txt', sep='\s+', skiprows=37)

df.columns = ['I', 'A0', 'A1', 'A2', 'A3', 'A4', 'B']
target = df['B']
features = df[['A1', 'A2', 'A3', 'A4']]

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2)

In [48]:
# create linear regression object
lm = linear_model.LinearRegression()

# fit the model to the training data
lm.fit(x_train, y_train)

print('Coefficients: \n', lm.coef_)
print('Intercept: \n', lm.intercept_)

Coefficients: 
 [-3.79911420e+01 -6.60620871e-02 -5.84679696e-03  1.42242718e+03]
Intercept: 
 373.93348578463883


In [49]:
y_fitted = lm.predict(x_test)
print("R^2 = %.2f" % r2_score(y_test, y_fitted))

R^2 = 0.36


In [50]:
tax_increase = 2.00
new_data = x_train.copy()
new_data['A1'] = new_data['A1'] + tax_increase
predicted_gasoline_consumption = lm.predict(new_data)

print("Predicted gasoline consumption with $2.00 tax increase: ")
print(predicted_gasoline_consumption)

Predicted gasoline consumption with $2.00 tax increase: 
[489.74703013 645.7043184  519.84356007 678.01275928 392.2103398
 575.69229713 492.0247116  214.76269436 545.22022517 616.09492287
 526.87892715 610.51014007 382.73531611 514.1632249  423.10571725
 487.9923547  456.26538281 430.01342886 562.96395289 402.55996282
 518.03986539 502.97963341 441.3031209  490.84089409 715.71787623
 529.55035724 516.435699   433.24250161 667.90230107 449.49486557
 369.54824939 584.41671324 429.48958353 469.38217002 568.01723135
 491.93770994 625.85545185]


In [51]:
# compare old and new predicted gasoline consumption values
difference = predicted_gasoline_consumption - y_train
#print('Difference in gasoline consumption (taxes increased by $2.00):')
#print(difference)
avg_difference = np.mean(difference)
print("Average difference: %.2f" % avg_difference)

Average difference: -75.98
