In [128]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd

In [129]:
def min_max_normalize(lst):
    """
        Helper function for movielens dataset, not useful for discrete multi class clasification.

        Return:
        Normalized list x, in range [0, 1]
    """
    maximum = max(lst)
    minimum = min(lst)
    toreturn = []
    for i in range(len(lst)):
        toreturn.append((lst[i]- minimum)/ (maximum - minimum))
    return toreturn

In [130]:
def z_standardize(X_inp):
    """
        Z-score Standardization.
        Standardize the feature matrix, and store the standarize rule.

        Parameter:
        X_inp: Input feature matrix.

        Return:
        Standardized feature matrix.
    """
    
    toreturn = X_inp.copy()
    for i in range(X_inp.shape[1]):
        std = np.std(X_inp[:, i])               
        mean = np.mean(X_inp[:, i])             
        feature = np.array(X_inp[:, i])
        toreturn[:, i] = (feature - mean) / std
    return toreturn

In [131]:
def sigmoid(x):
    """ 
        Sigmoid Function

        Return:
        transformed x.
    """
    return 1 / (1 + np.exp(-x))

In [143]:
class Logistic_Regression():
    
    def __init__(self):
        """
            Some initializations, if neccesary
        """
        
        self.model_name = 'Logistic Regression'
    
    def fit(self, X_train, y_train):
        """
            Save the datasets in our model, and do normalization to y_train
            
            Parameter:
                X_train: Matrix or 2-D array. Input feature matrix.
                Y_train: Matrix or 2-D array. Input target value.
        """
        
        self.X = X_train
        self.y = y_train
        
        count = 0
        uni = np.unique(y_train)
        for y in y_train:
            if y == min(uni):
                self.y[count] = -1
            else:
                self.y[count] = 1
            count += 1        
        
        n,m = X_train.shape
        self.theta = np.zeros(m)
        self.b = 0
    
    def gradient(self, X_inp, y_inp, theta, b):
        """
            Calculate the grandient of Weight and Bias, given sigmoid_yhat, true label, and data

            Parameter:
                X_inp: Matrix or 2-D array. Input feature matrix.
                y_inp: Matrix or 2-D array. Input target value.
                theta: Matrix or 1-D array. Weight matrix.
                b: int. Bias.

            Return:
                grad_theta: gradient with respect to theta
                grad_b: gradient with respect to b

        NOTE: There are several ways of implementing the gradient. We are merely providing you one way
        of doing it. Feel free to change the code and implement the way you want.
        """
        m = len(y_inp)
        predictions = sigmoid(b + X_inp @ theta)
        errors = predictions - y_inp
        grad_theta = (X_inp.T @ errors) / m
        grad_b = np.mean(errors)

        # This second implementation also works, but it's a lot slower
        # it conforms better to the skeleton code. 
        # grad_b = 0
        # grad_theta = np.zeros_like(theta)
        
        # for (xi, yi) in zip(X_inp, y_inp):
        #     sig = sigmoid(np.dot(xi, theta) + b)
        #     grad_b += sig - yi
        #     grad_theta += xi * (sig - yi)
        
        return grad_theta, grad_b

    def gradient_descent_logistic(self, alpha, num_pass, early_stop=0, standardized=True):
        """
            Logistic Regression with gradient descent method

            Parameter:
                alpha: (Hyper Parameter) Learning rate.
                num_pass: Number of iteration
                early_stop: (Hyper Parameter) Least improvement error allowed before stop. 
                            If improvement is less than the given value, then terminate the function and store the coefficents.
                            default = 0.
                standardized: bool, determine if we standardize the feature matrix.
                
            Return:
                self.theta: theta after training
                self.b: b after training
        """
        
        if standardized:
            self.X = z_standardize(self.X)
        
        n, m = self.X.shape

        for i in range(num_pass):    
            
            grad_theta, grad_b = self.gradient(self.X, self.y, self.theta, self.b)
            temp_theta = self.theta - alpha * grad_theta
            temp_b = self.b - alpha * grad_b

            previous_y_hat = sigmoid(np.dot(self.X, self.theta) + self.b)
            temp_y_hat = sigmoid(np.dot(self.X, temp_theta) + temp_b)
            pre_error = np.mean(np.abs(self.y-previous_y_hat)/2)
            temp_error = np.mean(np.abs(self.y-temp_y_hat)/2)

            if (abs(pre_error - temp_error) < early_stop) or (abs((pre_error - temp_error) / pre_error) < early_stop):
                return temp_theta, temp_b

            self.theta = temp_theta
            self.b = temp_b
        return self.theta, self.b

    
    def predict_ind(self, x: list):
        """
            Predict the most likely class label of one test instance based on its feature vector x.

            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                p: prediction of given data point
        """
        x = np.array(x)
        y_hat = sigmoid(x @ self.theta + self.b)
        p = 1 if y_hat >= 0.5 else -1
        
        return p
    
    def predict(self, X):
        """
            X is a matrix or 2-D numpy array, representing testing instances. 
            Each testing instance is a feature vector. 
            
            Parameter:
            X: Matrix, array or list. Input feature points.
            
            Return:
                p: prediction of given data matrix
        """
        ret = []                  
        for x in X:
            ret.append(self.predict_ind(x))  

        return ret

In [144]:
url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
#names = ['f_acid', 'v_acid', 'c_acid', 'sugar', 'chlorides', 'f_SO2', 't_SO2', 'density', 'ph', 'sulphates', 'alcohol', 'quality']
wine = pd.read_csv(url_Wine, delimiter=';')

In [145]:
wine5 = wine.loc[wine.quality == 5]
wine6 = wine.loc[wine.quality == 6]
wineall = pd.concat([wine5,wine6])
wineall

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
5,7.4,0.66,0.00,1.8,0.075,13.0,40.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1592,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1593,6.8,0.62,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6
1595,5.9,0.55,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6


In [146]:
X = np.array(wineall.iloc[:,:10])
Y = np.array(wineall.quality)

uni = np.unique(Y)
for i, y in enumerate(Y):
    if y == min(uni):
        Y[i] = -1
    else:
        Y[i] = 1

In [149]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

logit = Logistic_Regression()
logit.fit(X_train, Y_train)
g = logit.gradient_descent_logistic(0.01, 1000, early_stop=1e-20)
predictions = logit.predict(X_test)
rmse = root_mean_squared_error(Y_test, predictions)

rmse

1.381698559415515