# Part 1

In [46]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn import metrics

import pickle

sns.set()

In [47]:
columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name']

In [48]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data', na_values = ['NA', '?'], names = columns, delim_whitespace=True)

In [49]:
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [50]:
data['cylinders'].min()

3

In [51]:
data.isnull().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
year            0
origin          0
name            0
dtype: int64

In [52]:
data['cylinders'].max()

8

In [53]:
data['horsepower'] = data['horsepower'].fillna(data['horsepower'].median())

In [54]:
data.isnull().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
year            0
origin          0
name            0
dtype: int64

In [55]:
numeric_list = data.select_dtypes(include=[np.number]).columns
data[numeric_list] = data[numeric_list].astype(np.float32)

In [56]:
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0,chevrolet chevelle malibu
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0,buick skylark 320
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0,plymouth satellite
3,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0,amc rebel sst
4,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0,ford torino


In [57]:
x = data[['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin']].values
y = data['mpg'].values

In [58]:
type(y)

numpy.ndarray

In [59]:
type(x)

numpy.ndarray

In [60]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 101)

In [61]:
print(f'The shape of the data is: \nx_train: \t{x_train.shape} \nx_test: \t{x_test.shape} \ny_train: \t{y_train.shape} \ny_test: \t{y_test.shape}')

The shape of the data is: 
x_train: 	(318, 7) 
x_test: 	(80, 7) 
y_train: 	(318,) 
y_test: 	(80,)


In [62]:
LinearRegression_model = LinearRegression()
DecisionTree_model = DecisionTreeRegressor()
RandomForest_model = RandomForestRegressor()
XGBRegressor_model = XGBRegressor()

In [63]:
models = [LinearRegression_model, DecisionTree_model, RandomForest_model, XGBRegressor_model]
for model in models:
    model.fit(x_train, y_train)
    print(f'{model} is trained!')

LinearRegression() is trained!
DecisionTreeRegressor() is trained!
RandomForestRegressor() is trained!
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...) is trained!


In [64]:
accuracy_LinearRegression = LinearRegression_model.score(x_test, y_test)
accuracy_DecisionTree = DecisionTree_model.score(x_test, y_test)
accuracy_RandomForest = RandomForest_model.score(x_test, y_test)
accuracy_XGBoost = XGBRegressor_model.score(x_test, y_test)

In [65]:
models = {'LinearRegression_model': accuracy_LinearRegression, 'DecisionTree_model': accuracy_DecisionTree, 'RandomForest_model': accuracy_RandomForest,
          'XGBRegressor_model': accuracy_XGBoost}
for model, score in models.items():
    print(f'The accuracy score for the {model} is {round(score*100, 2)}%')

The accuracy score for the LinearRegression_model is 80.01%
The accuracy score for the DecisionTree_model is 68.31%
The accuracy score for the RandomForest_model is 90.37%
The accuracy score for the XGBRegressor_model is 86.67%


In [66]:
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0,chevrolet chevelle malibu
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0,buick skylark 320
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0,plymouth satellite
3,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0,amc rebel sst
4,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0,ford torino


In [67]:
data.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin', 'name'],
      dtype='object')

In [68]:
test_x = np.zeros((1, 7))
test_x

array([[0., 0., 0., 0., 0., 0., 0.]])

In [69]:
# test_x = np.zeros((1, 7))

# test_x[0, 0] = 6.0      # cylinders
# test_x[0, 1] = 370.0    # displacement
# test_x[0, 2] = 150.0    # horsepower
# test_x[0, 3] = 3304.0   # weight
# test_x[0, 4] = 11.0     # acceleration
# test_x[0, 5] = 50.0     # year
# test_x[0, 6] = 1.0      # origin
# test_x

test_x = np.zeros((1, 7))

test_x[0, 0] = 8.0
test_x[0, 1] = 307.0
test_x[0, 2] = 130.0
test_x[0, 3] = 3200.0
test_x[0, 4] = 12.0
test_x[0, 5] = 70.0
test_x[0, 6] = 1.0
test_x

array([[8.00e+00, 3.07e+02, 1.30e+02, 3.20e+03, 1.20e+01, 7.00e+01,
        1.00e+00]])

In [70]:
prediction = LinearRegression_model.predict(test_x)
round(float(prediction[0]), 2)

17.16

In [71]:
models = [LinearRegression_model, DecisionTree_model, RandomForest_model, XGBRegressor_model]

In [72]:
for model in models:
    prediction = model.predict(test_x)
    print(f'The model {model} predicts an mpg of {round(float(prediction[0]), 2)}')

The model LinearRegression() predicts an mpg of 17.16
The model DecisionTreeRegressor() predicts an mpg of 18.0
The model RandomForestRegressor() predicts an mpg of 16.47
The model XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...) predicts an mpg of 16.13


In [73]:
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0,chevrolet chevelle malibu
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0,buick skylark 320
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0,plymouth satellite
3,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0,amc rebel sst
4,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0,ford torino


In [74]:
#mdl_file_v2 = '/content/drive/MyDrive/LaGuardia Classes/Fall 2023 Predictive Analytics/'

In [75]:
#mdl_file_v2 = '/Users/pps/Documents/Data/Python/Predictive Analytics/Logic/Class Work'

In [76]:
#with open('mdl_file_v2', 'wb') as file:
    #model = pickle.dump(LinearRegression_model, file)

In [77]:
#import os
#current_directory = os.getcwd()
#print(current_directory)

In [78]:
# Using nn
test_x = np.zeros((1, 7))

test_x[0, 0] = 8.0
test_x[0, 1] = 307.0
test_x[0, 2] = 130.0
test_x[0, 3] = 3200.0
test_x[0, 4] = 12.0
test_x[0, 5] = 70.0
test_x[0, 6] = 1.0
test_x

array([[8.00e+00, 3.07e+02, 1.30e+02, 3.20e+03, 1.20e+01, 7.00e+01,
        1.00e+00]])

In [79]:
#prediction = model.predict(test_x)
#round(float(prediction[0]), 2)

In [80]:
model = LinearRegression_model

In [81]:
prediction = model.predict(test_x)
round(float(prediction[0]), 2)

17.16

# Part 2

In [None]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
model = Sequential()
model.add(Dense(25, input_dim = x.shape[1], activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(1))

In [None]:
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

In [None]:
monitor = EarlyStopping(monitor = 'val_loss', min_delta = 1e-3, patience = 5, verbose = 1, mode = 'auto', restore_best_weights = True)

In [None]:
model.fit(x_train, y_train, validation_data = (x_test, y_test), callbacks = [monitor], verbose = 2, epochs = 1000)

In [None]:
prediction = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(prediction, y_test))
print(f'After training the score is: {score}')

In [None]:
col = [x for x in data.columns if x not in ('mpg', 'name')]
# col

# Normal loop way
print('{')
for i, name in enumerate(col):
    print(f'"{name}":{{"min":{data[name].min()}, "max":{data[name].max()}}}{"," if i < (len(col)-1) else ""}')
print('}')

In [None]:
model.save('mpg_model1.h5')

In [None]:
model.summary()

In [None]:
from keras.utils import plot_model
plot_model(model, show_shapes=True)

In [None]:
test_x = np.zeros((1, 7))

test_x[0, 0] = 8.0
test_x[0, 1] = 307.0
test_x[0, 2] = 130.0
test_x[0, 3] = 3200.0
test_x[0, 4] = 12.0
test_x[0, 5] = 70.0
test_x[0, 6] = 1.0
test_x

In [None]:
prediction = model.predict(test_x)
round(float(prediction[0]), 2)