# Regression With Keras

In [1]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

Using TensorFlow backend.


## Linear Regression

In [2]:
# Load dataset
boston_data = load_boston()
boston_data

# Create pandas dataframe
boston = pd.DataFrame(boston_data.data, columns=boston_data.feature_names)
boston

# Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(boston, boston_data.target, test_size=0.2, random_state=1)

In [3]:
# Create baseline sklearn model for comparision 
basic_reg = LinearRegression()
basic_reg.fit(X_train, y_train)

y_pred = basic_reg.predict(X_test)
print('MSE:', mean_squared_error(y_test, y_pred))

MSE: 23.38083648026999


In [4]:
# Create linear regression model
def linear_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# Create a model to estimate linear_model 
train_estimator = KerasRegressor(build_fn=linear_model, epochs=100, batch_size=5, verbose=0)
test_estimator = KerasRegressor(build_fn=linear_model, epochs=100, batch_size=5, verbose=0)
test_estimator.fit(X_train, y_train)

<tensorflow.python.keras.callbacks.History at 0x136ba33d0>

In [5]:
# Evaluate linear regression model
kfold = KFold(n_splits=10)
results = cross_val_score(train_estimator, X_train, y_train, cv=kfold)
print("Train MSE:", results.mean()* -1)

y_pred = test_estimator.predict(X_test)
print('Test MSE:', mean_squared_error(y_pred, y_test))

Train MSE: 27.093470287322997
Test MSE: 20.33682469599655


## Logistic Regression

In [6]:
# Create pandas dataframe
diabetes = pd.read_csv('../Data/diabetes.csv')
diabetes

# Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(diabetes.drop(columns=['Outcome']), diabetes['Outcome'], test_size=0.2, random_state=1)

In [7]:
# Create Logistic Model
def log_model():
    model = Sequential()
    model.add(Dense(8, input_dim=8, kernel_initializer='normal', activation='softmax'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# Create a model to estimate linear_model 
train_estimator = KerasRegressor(build_fn=log_model, epochs=100, batch_size=5, verbose=0)
test_estimator = KerasRegressor(build_fn=log_model, epochs=100, batch_size=5, verbose=0)
test_estimator.fit(X_train, y_train)

<tensorflow.python.keras.callbacks.History at 0x1377bafd0>

In [8]:
# Evaluate logistic regression model
kfold = KFold(n_splits=10)
results = cross_val_score(train_estimator, X_train, y_train, cv=kfold)
print("Train MSE:", results.mean()* -1)

y_pred = test_estimator.predict(X_test)
print('Test MSE:', mean_squared_error(y_pred, y_test))

Train MSE: 0.20133467614650727
Test MSE: 0.21286271779277952
