In [17]:
import numpy as np 
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

## Load Data

In [2]:
data = pd.read_csv('data.data', header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,64,65,66,67,68,69,70,71,72,73
0,1/1/1998,0.8,1.8,2.4,2.1,2.0,2.1,1.5,1.7,1.9,...,0.15,10.67,-1.56,5795,-12.1,17.9,10330,-55,0.0,0.0
1,1/2/1998,2.8,3.2,3.3,2.7,3.3,3.2,2.9,2.8,3.1,...,0.48,8.39,3.84,5805,14.05,29,10275,-55,0.0,0.0
2,1/3/1998,2.9,2.8,2.6,2.1,2.2,2.5,2.5,2.7,2.2,...,0.6,6.94,9.8,5790,17.9,41.3,10235,-40,0.0,0.0
3,1/4/1998,4.7,3.8,3.7,3.8,2.9,3.1,2.8,2.5,2.4,...,0.49,8.73,10.54,5775,31.15,51.7,10195,-40,2.08,0.0
4,1/5/1998,2.6,2.1,1.6,1.4,0.9,1.5,1.2,1.4,1.3,...,?,?,?,?,?,?,?,?,0.58,0.0


## Split Targets and Data

In [3]:
targets = data.iloc[:, -1]
data = data.iloc[:,1:-1]

## Replace missings with 0.0 for mathematical operations

In [4]:
for i in data.columns.values:
    data[i].replace(to_replace='?', value=0.0, inplace=True)
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,63,64,65,66,67,68,69,70,71,72
0,0.8,1.8,2.4,2.1,2.0,2.1,1.5,1.7,1.9,2.3,...,-15.5,0.15,10.67,-1.56,5795.0,-12.1,17.9,10330.0,-55.0,0.0
1,2.8,3.2,3.3,2.7,3.3,3.2,2.9,2.8,3.1,3.4,...,-14.5,0.48,8.39,3.84,5805.0,14.05,29.0,10275.0,-55.0,0.0
2,2.9,2.8,2.6,2.1,2.2,2.5,2.5,2.7,2.2,2.5,...,-15.9,0.6,6.94,9.8,5790.0,17.9,41.3,10235.0,-40.0,0.0
3,4.7,3.8,3.7,3.8,2.9,3.1,2.8,2.5,2.4,3.1,...,-16.8,0.49,8.73,10.54,5775.0,31.15,51.7,10195.0,-40.0,2.08
4,2.6,2.1,1.6,1.4,0.9,1.5,1.2,1.4,1.3,1.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.58


In [5]:
data.dtypes

1     object
2     object
3     object
4     object
5     object
       ...  
68    object
69    object
70    object
71    object
72    object
Length: 72, dtype: object

## Change dtype to float for mathematical operations

In [6]:
for i in data.columns.values:
    data[i] = pd.to_numeric(data[i])
data.dtypes

1     float64
2     float64
3     float64
4     float64
5     float64
       ...   
68    float64
69    float64
70    float64
71    float64
72    float64
Length: 72, dtype: object

## Nirmalize Data

In [7]:
data = (data - data.mean()) / data.std()
data = data.values

## Implement logestic regression classifier

In [8]:
from sklearn.model_selection import KFold

In [9]:
class LogisticRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000, l2_rate = 1e-3):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.l2_rate = l2_rate
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            
            # regularization term
            ridge_reg_term = (self.l2_rate / 2 * len(X)) * np.sum(np.square(self.weights))
            
            linear_model += ridge_reg_term
            
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def _sigmoid(self, x):
        return np.where(x < 0, np.exp(x)/(1 + np.exp(x)), 1/(1 + np.exp(-x)))


## Test Algorithm

In [15]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [21]:
for lr in [1, 1e-1, 1e-2, 1e-3]:
    for l2 in [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2]:

        regressor = LogisticRegression(learning_rate=lr, n_iters=1000, l2_rate = l2)
        train_scores, test_scores = list(), list()
        kf = KFold(n_splits=3, shuffle = True, random_state = 42)
        for train_index, test_index in kf.split(data):
            xtrain, xtest = data[train_index], data[test_index]
            ytrain, ytest = targets[train_index], targets[test_index]

            regressor.fit(xtrain, ytrain)
            predictions = regressor.predict(xtest)
            train_scores.append(accuracy(ytrain, regressor.predict(xtrain)))
            test_scores.append(accuracy(ytest, predictions))

        print(f"learning Rate: {lr}, L2_rate: {l2}\n\t\tMean Train Accuracy: {np.mean(train_scores)}\n\t\tMean Test Accuracy: {np.mean(test_scores)}\n\t\tSTD: {np.std(test_scores)}\n")

learning Rate: 1, L2_rate: 0.001
		Mean Train Accuracy: 0.971214003361572
		Mean Test Accuracy: 0.9712124815234472
		STD: 0.004368126530779725

learning Rate: 1, L2_rate: 0.01
		Mean Train Accuracy: 0.971214003361572
		Mean Test Accuracy: 0.9712124815234472
		STD: 0.004368126530779725

learning Rate: 1, L2_rate: 0.1
		Mean Train Accuracy: 0.9195645352994214
		Mean Test Accuracy: 0.900220552921044
		STD: 0.0802198329596239

learning Rate: 1, L2_rate: 1.0
		Mean Train Accuracy: 0.9189728193230899
		Mean Test Accuracy: 0.8994320645711807
		STD: 0.08057222642151488

learning Rate: 1, L2_rate: 10.0
		Mean Train Accuracy: 0.9189728193230899
		Mean Test Accuracy: 0.8994320645711807
		STD: 0.08057222642151488

learning Rate: 1, L2_rate: 100.0
		Mean Train Accuracy: 0.9189728193230899
		Mean Test Accuracy: 0.8994320645711807
		STD: 0.08057222642151488

learning Rate: 0.1, L2_rate: 0.001
		Mean Train Accuracy: 0.971214003361572
		Mean Test Accuracy: 0.9712124815234472
		STD: 0.004368126530779725

# ANN

In [22]:
import tensorflow as tf

## Change in neurons and layers

In [29]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")


	Mean Train Accuracy: 0.9751571416854858
	Mean Test Accuracy: 0.9723954399426779
	STD: 0.00566985325268259



In [30]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")



	Mean Train Accuracy: 0.9875804980595907
	Mean Test Accuracy: 0.9672719041506449
	STD: 0.004017625848132292



In [31]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, 'relu'),
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")


	Mean Train Accuracy: 0.997831662495931
	Mean Test Accuracy: 0.9633317987124125
	STD: 0.007707888360845271



In [32]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, 'relu'),
        tf.keras.layers.Dense(64, 'relu'),
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")


	Mean Train Accuracy: 1.0
	Mean Test Accuracy: 0.9649055004119873
	STD: 0.004355941454447569



## Change in Learning Rate

In [33]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, 'relu'),
        tf.keras.layers.Dense(64, 'relu'),
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-2)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")


	Mean Train Accuracy: 0.9940856496493021
	Mean Test Accuracy: 0.9629349708557129
	STD: 0.0062042109755699685



In [34]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, 'relu'),
        tf.keras.layers.Dense(64, 'relu'),
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-1)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")


	Mean Train Accuracy: 0.9712140162785848
	Mean Test Accuracy: 0.9712124864260355
	STD: 0.004368134619110804



In [None]:
train_scores, test_scores = list(), list()
kf = KFold(n_splits=3, shuffle = True, random_state = 42)
for train_index, test_index in kf.split(data):
    xtrain, xtest = data[train_index], data[test_index]
    ytrain, ytest = targets[train_index], targets[test_index]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, 'relu'),
        tf.keras.layers.Dense(64, 'relu'),
        tf.keras.layers.Dense(32, 'relu'),
        tf.keras.layers.Dense(16, 'relu'),
        tf.keras.layers.Dense(2, 'softmax'),
    ])
    opt = tf.keras.optimizers.Adam(learning_rate = 1)
    model.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy', metrics = 'acc')
    model.fit(xtrain, ytrain, epochs=50, batch_size=128, verbose = False)
    train_scores.append(model.evaluate(xtrain, ytrain)[1])
    test_scores.append(model.evaluate(xtest, ytest)[1])

print(f"\n\tMean Train Accuracy: {np.mean(train_scores)}\n\tMean Test Accuracy: {np.mean(test_scores)}\n\tSTD: {np.std(test_scores)}\n")

