In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
RANDOM_SEED = 42

In [3]:
auto_df = pd.read_csv("auto.csv")
auto_df.shape

(398, 9)

In [4]:
keep_columns = ['mpg', 'horsepower', 'weight', 'acceleration', 'origin']

In [5]:
auto_df['is_american'] = (auto_df.origin == 1).astype(int)

In [6]:
def create_regression_dataset(
    df,
    columns=['weight', 'horsepower', 'mpg']
):
    all_columns = columns.copy()
    all_columns.append('acceleration')

    print(df.columns)
    reg_df = df[all_columns]
    
    reg_df = StandardScaler().fit_transform(reg_df[all_columns])
    reg_df = pd.DataFrame(reg_df, columns=all_columns)

    return reg_df[columns], reg_df.acceleration

def create_classification_dataset(df):
    columns = ['mpg', 'weight', 'horsepower']
    
    x = df[columns]
    x = StandardScaler().fit_transform(x)
    x = pd.DataFrame(X, columns=columns) 

    return x, df.is_american

In [7]:
from sklearn.model_selection import KFold, cross_val_score

def eval_model(model, x,y,score):
    cv = KFold(n_splits=10, random_state=RANDOM_SEED)
    results = cross_val_score(model,x,y,cv=cv, scoring=score)
    return np.abs(results.mean())

def eval_classifier(model, x,y):
    return eval_model(model, x,y, score='accuracy')

def eval_regressor(model, x,y):
    return eval_model(model,x,y,score='neg_mean_squared_error')


In [8]:
from sklearn.linear_model import LinearRegression

x,y = create_regression_dataset(auto_df, columns=['horsepower'])

reg = LinearRegression()
eval_regressor(reg,x,y)

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name', 'is_american'],
      dtype='object')


0.5534493482629231

In [9]:
x,y = create_regression_dataset(auto_df)

reg = LinearRegression()
eval_regressor(reg,x,y)

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name', 'is_american'],
      dtype='object')


0.5016008908512629