In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math


# Exercise 00 - Sigmoid

In [2]:
def sigmoid_(x):
    if isinstance(x, (list, np.ndarray)):
        sig = list()
        for item in x:
            sig = np.append(sig, 1 / (1 + math.exp(-item)))
    else:
        sig = 1 / (1 + math.exp(-x))
    return(sig)

In [3]:
print(sigmoid_(-4))
print(sigmoid_(2))
print(sigmoid_([-4, 2, 0]))

0.01798620996209156
0.8807970779778823
[0.01798621 0.88079708 0.5       ]


In [4]:
# Exercise 01 - Logistic Loss Function

In [5]:
def log_loss_(y_true, y_pred, m, eps=1e-15):
    log_loss = 0.0
    try:
        for i in range(len(y_pred)):
            log_loss += y_true[i] * math.log(y_pred[i]) + (1 - y_true[i]) * math.log(1 - y_pred[i])
        return(-log_loss / len(y_true))
    except:
        return (-y_true * math.log(y_pred) + (1 - y_true) * math.log(1 - y_pred))

In [6]:
# Test n.1
x = 4
y_true = 1
theta = 0.5
y_pred = sigmoid_(x * theta)
m = 1 # length of y_true is 1
print(log_loss_(y_true, y_pred, m))
# 0.12692801104297152
# Test n.2
x = [1, 2, 3, 4]
y_true = 0
theta = [-1.5, 2.3, 1.4, 0.7]
x_dot_theta = sum([a*b for a, b in zip(x, theta)])
y_pred = sigmoid_(x_dot_theta)
m = 1
print(log_loss_(y_true, y_pred, m))
# 10.100041078687479
# Test n.3
x_new = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
y_true = [1, 0, 1]
theta = [-1.5, 2.3, 1.4, 0.7]
x_dot_theta = []
for i in range(len(x_new)):
    my_sum = 0
    for j in range(len(x_new[i])):
        my_sum += x_new[i][j] * theta[j]
    x_dot_theta.append(my_sum)
y_pred = sigmoid_(x_dot_theta)
m = len(y_true)
print(log_loss_(y_true, y_pred, m))

0.12692801104297263
-10.100041078711822
7.233347032629922


# Exercise 02 - Logistic Gradient

In [7]:
def dot(x, y):
    dot = 0.0
    if not isinstance(x, np.ndarray) or x.size == 0 or x.ndim > 2:
        print('Dot Error : x dim')
        return None
    if not isinstance(y, np.ndarray) or y.size == 0 or y.ndim > 2:
        print('Dot Error : y dim')
        return None
    if x.ndim != y.ndim:
        print('Dot Error : incompatible x and y dim')
        return None
    #x_t = x.reshape(-1, 1)
    for elem in range(x.size):
        dot += x[elem] * y[elem]
    return dot

In [8]:
def log_gradient_(x, y_true, y_pred):
    try:
        grad = np.empty(0)
        for item in x:
            grad = np.append(grad, (y_pred - y_true) * np.array(item))
    except :
        grad = dot(np.array((y_pred - y_true)).reshape(-1, 1), np.array(x))
    return (grad)

In [9]:
# Test n.1
x = [1, 4.2] # 1 represent the intercept
y_true = 1
theta = [0.5, -0.5]
x_dot_theta = sum([a*b for a, b in zip(x, theta)])
y_pred = sigmoid_(x_dot_theta)
print(log_gradient_(x, y_pred, y_true))
# [0.8320183851339245, 3.494477217562483]
print("\n")
# Test n.2
x = [1, -0.5, 2.3, -1.5, 3.2]
y_true = 0
theta = [0.5, -0.5, 1.2, -1.2, 2.3]
x_dot_theta = sum([a*b for a, b in zip(x, theta)])
y_pred = sigmoid_(x_dot_theta)
print(log_gradient_(x, y_true, y_pred))
# [0.99999685596372, -0.49999842798186, 2.299992768716556, -1.4999952839455801, 3.1999899390839044]
print("\n")

# Test n.3
x_new = [[1, 2, 3, 4, 5], [1, 6, 7, 8, 9], [1, 10, 11, 12, 13]]
# first column of x_new are intercept values initialized to 1
y_true = [1, 0, 1]
theta = [0.5, -0.5, 1.2, -1.2, 2.3]
x_new_dot_theta = []
for i in range(len(x_new)):
    my_sum = 0
    for j in range(len(x_new[i])):
        my_sum += x_new[i][j] * theta[j]
    x_new_dot_theta.append(my_sum)
y_pred = sigmoid_(x_new_dot_theta)
print(log_gradient_(x_new, y_true, y_pred))
# [0.9999445100449934, 5.999888854245219, 6.999833364290213, 7.999777874335206, 8.999722384380199

[0.83201839 3.49447722]


[ 0.99999686 -0.49999843  2.29999277 -1.49999528  3.19998994]


[0.99994451 5.99988885 6.99983336 7.99977787 8.99972238]


# Exercise 03 - Vectorized Logistic Loss Function

In [10]:
def vec_log_loss_(y_true, y_pred, m, eps=1e-15):
    log_loss = 0.0
    y_true = np.array(y_true).reshape(-1, 1)
    y_pred = np.array(y_pred).reshape(-1, 1)
    for i in range(y_true.size):
        log_loss += (y_true[i] * math.log(y_pred[i])) + ((1 - y_true[i]) * math.log(1 - y_pred[i]))
    return(log_loss / -m)

In [11]:
x = 4
y_true = 1
theta = 0.5
y_pred = sigmoid_(x * theta)
m = 1 # length of y_true is 1
print(vec_log_loss_(y_true, y_pred, m))

x = np.array([1, 2, 3, 4])
y_true = 0
theta = np.array([-1.5, 2.3, 1.4, 0.7])
y_pred = sigmoid_(np.dot(x, theta))
m = 1
print(vec_log_loss_(y_true, y_pred, m))

x_new = np.arange(1, 13).reshape((3, 4))
y_true = np.array([1, 0, 1])
theta = np.array([-1.5, 2.3, 1.4, 0.7])
y_pred = sigmoid_(np.dot(x_new, theta))
m = len(y_true)
print(vec_log_loss_(y_true, y_pred, m)) 

[0.12692801]
[10.10004108]
[7.23334703]


# Exercise 04 - Vectorized Logistic Gradient

In [12]:
def dot(x, y):
    dot = 0.0
    if not isinstance(x, np.ndarray) or x.size == 0 or x.ndim > 2:
        print('Dot Error : x dim')
        return None
    if not isinstance(y, np.ndarray) or y.size == 0 or y.ndim > 2:
        print('Dot Error : y dim')
        return None
    if x.ndim != y.ndim:
        print('Dot Error : incompatible x and y dim')
        return None
    for elem in range(x.size):
        dot += x[elem] * y[elem]
    return dot

In [13]:
def vec_log_gradient_(x, y_true, y_pred):
    try :
         res = dot(np.array((y_pred - y_true)).reshape(-1, 1), x)       
    except :
        res = (y_pred - y_true) * x
    return(res)


In [14]:
# Test n.1
x = np.array([1, 4.2]) # x[0] represent the intercept
y_true = 1
theta = np.array([0.5, -0.5])
y_pred = sigmoid_(np.dot(x, theta))
print(vec_log_gradient_(x, y_pred, y_true))
# [0.83201839 3.49447722]
print()


# Test n.2
x = np.array([1, -0.5, 2.3, -1.5, 3.2]) # x[0] represent the intercept
y_true = 0
theta = np.array([0.5, -0.5, 1.2, -1.2, 2.3])
y_pred = sigmoid_(np.dot(x, theta))
print(vec_log_gradient_(x, y_true, y_pred))
# [ 0.99999686 -0.49999843 2.29999277 -1.49999528 3.19998994]
print()

# Test n.3
x_new = np.arange(2, 14).reshape((3, 4))
x_new = np.insert(x_new, 0, 1, axis=1)
# first column of x_new are now intercept values initialized to 1
y_true = np.array([1, 0, 1])
theta = np.array([0.5, -0.5, 1.2, -1.2, 2.3])
y_pred = sigmoid_(np.dot(x_new, theta))
print(vec_log_gradient_(x_new, y_true, y_pred))
# [0.99994451 5.99988885 6.99983336 7.99977787 8.99972238]

Dot Error : incompatible x and y dim
None

Dot Error : incompatible x and y dim
None

[0.99994451 5.99988885 6.99983336 7.99977787 8.99972238]


# Exercise 05 - Logistic Regression

In [15]:
df_train = pd.read_csv('../subjects/day02/resources/dataset/train_dataset_clean.csv', delimiter=',', header=None, index_col=False)
df_test = pd.read_csv('../subjects/day02/resources/dataset/test_dataset_clean.csv', delimiter=',', header=None, index_col=False)

x_train, y_train = np.array(df_train.iloc[:, 1:82]), df_train.iloc[:, 0]
x_test, y_test = np.array(df_test.iloc[:, 1:82]), df_test.iloc[:, 0]

In [23]:
class LogisticRegressionBatchGd:
    def __init__(self, alpha=0.001, max_iter=1000, verbose=False, learning_rate='constant'):
        self.alpha = alpha
        self.max_iter = max_iter
        self.verbose = verbose
        self.learning_rate = learning_rate # can be 'constant' or 'invscaling'
        self.thetas = []
        
    def fit(self, x_train, y_train):
        y_pred = self.predict(x_train)
        X_train = np.concatenate((np.ones((x_train.shape[0],1)), x_train), axis=1)
        for i in range(self.max_iter):
            if i % 150 == 0:
                loss = self.mse(x_train, y_train)
                print("epoch =", i,"  loss = ", loss)
            self.thetas = self.thetas - self.alpha * vec_log_gradient_(X_train, np.array(y_train).reshape(-1, 1), y_pred)

    def predict(self, x_train):
        if not np.array(self.thetas).any():
            self.thetas = np.ones(x_train.shape[1] + 1)
        pred = np.empty(0)
        X_conc = np.concatenate((np.ones((x_train.shape[0],1)), x_train), axis=1)
        for j in range(X_conc.shape[0]):
            pred = np.append(pred, dot(self.thetas, X_conc[j]))
        return (pred.reshape(-1, 1))
    
    def score(self, x_train, y_train):
        y_pred = (self.predict(x_train) >= 0.5).astype(int)
        y_pred = y_pred.flatten()
        accuracy = np.mean(y_pred == y_train)
        return accuracy * 100

    def mse(self, x_train, y_train):
        mse_ = 0.0
        y_pred = self.predict(x_train)
        for i in range(y_train.size):
            mse_ += (y_train[i] - y_pred[i])**2
        return mse_ / y_train.size

In [24]:
model = LogisticRegressionBatchGd(alpha=0.01, max_iter=1500, verbose=True, learning_rate='constant')
print(model.score(x_train, y_train))
print(model.fit(x_train, y_train))
print(model.score(x_train, y_train))


29.996007493627346
epoch = 0   loss =  [18.49908904]
epoch = 150   loss =  [8.18612075e+10]
epoch = 300   loss =  [3.27449605e+11]
epoch = 450   loss =  [7.36765192e+11]
epoch = 600   loss =  [1.30980797e+12]
epoch = 750   loss =  [2.04657794e+12]
epoch = 900   loss =  [2.94707509e+12]
epoch = 1050   loss =  [4.01129944e+12]
epoch = 1200   loss =  [5.23925098e+12]
epoch = 1350   loss =  [6.6309297e+12]
None
75.54129172936949


In [None]:
print()