In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm


In [2]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)

dataset = raw_dataset.copy()
dataset = dataset.dropna()
dataset = dataset.astype(int)
dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18,8,307,130,3504,12,70,1
1,15,8,350,165,3693,11,70,1
2,18,8,318,150,3436,11,70,1
3,16,8,304,150,3433,12,70,1
4,17,8,302,140,3449,10,70,1


In [3]:
#The following script divides data into attributes and labels:
X = dataset.iloc[:, [1,2,3,5,6,7]].values
y = dataset.iloc[:, [0,4]].values

#divide the data into training and testing sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


loss_per_fold_0 = []
loss_per_fold_1 = []


#Divide the data into training and testing sets for KFold Cross validaton loop
# Define the K-fold Cross Validator
cv = KFold(n_splits=392, shuffle=False)
cv.get_n_splits(X)
print(cv,"\n")

# K-fold Cross Validation model evaluation
fold_no = 1


for train_index, test_index in cv.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    
    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)  
    
    logreg = LogisticRegression()

    # Create the Multioutput Regressor
    mor = MultiOutputRegressor(logreg)

    # Train the regressor
    mor = mor.fit(X_train, y_train)
    y_pred = mor.predict(X_test)

# Generate generalization metrics
    
    print(f'loss for fold {fold_no}: \n Regressor 1: {metrics.mean_absolute_error(y_test[:,0], y_pred[:,0])}\n Regressor : {metrics.mean_absolute_error(y_test[:,1], y_pred[:,1])}\n\n')

    loss_per_fold_0.append(metrics.mean_absolute_error(y_test[:,0], y_pred[:,0]))
    loss_per_fold_1.append(metrics.mean_absolute_error(y_test[:,1], y_pred[:,1]))

    # Increase fold number
    fold_no = fold_no + 1

KFold(n_splits=392, random_state=None, shuffle=False) 

loss for fold 1: 
 Regressor 1: 4.0
 Regressor : 168.0


loss for fold 2: 
 Regressor 1: 1.0
 Regressor : 21.0


loss for fold 3: 
 Regressor 1: 4.0
 Regressor : 236.0


loss for fold 4: 
 Regressor 1: 2.0
 Regressor : 239.0


loss for fold 5: 
 Regressor 1: 3.0
 Regressor : 223.0


loss for fold 6: 
 Regressor 1: 1.0
 Regressor : 123.0


loss for fold 7: 
 Regressor 1: 0.0
 Regressor : 42.0


loss for fold 8: 
 Regressor 1: 0.0
 Regressor : 42.0


loss for fold 9: 
 Regressor 1: 0.0
 Regressor : 1339.0


loss for fold 10: 
 Regressor 1: 1.0
 Regressor : 462.0


loss for fold 11: 
 Regressor 1: 1.0
 Regressor : 901.0


loss for fold 12: 
 Regressor 1: 0.0
 Regressor : 63.0


loss for fold 13: 
 Regressor 1: 1.0
 Regressor : 703.0


loss for fold 14: 
 Regressor 1: 0.0
 Regressor : 1339.0


loss for fold 15: 
 Regressor 1: 0.0
 Regressor : 242.0


loss for fold 16: 
 Regressor 1: 4.0
 Regressor : 425.0


loss for fold 17: 
 Regress

In [4]:

print('Average scores for all folds:')
print(f'> Mean loss fisrt regressor: {np.mean(loss_per_fold_0)}')
print(f'> Mean loss fisrt regressor: {np.mean(loss_per_fold_1)}')
print('------------------------------------------------------------------------')


Average scores for all folds:
> Mean loss fisrt regressor: 2.688775510204082
> Mean loss fisrt regressor: 304.94132653061223
------------------------------------------------------------------------
