In [3]:
# import packages 
import numpy as np
import math, random 
import pandas as pd
import matplotlib.pyplot as plt 
from mpl_toolkits import mplot3d

# open the csv files with pandas 
df_red = pd.read_csv('winequality-red.csv',delimiter=";")
df_white = pd.read_csv('winequality-white.csv',delimiter=";")

# asign label 1 for red wine and label 0 for white wine
df_red["color"] = 1
df_white["color"] = 0 


# combine the 2 data frames and then shufle them 
df = pd.concat([df_red, df_white])
df = df.sample(frac=1).reset_index(drop=True)

# select the atributes used to perform the prediction

# all the atributes exept the color where asigned to the variable X 
X = df.iloc[:,1:-1].to_numpy()

# asign the color atribute to the variable Y

Y = df[['color']].to_numpy()

print("Shape of X:", X.shape , X)
print("Shape of Y:", Y.shape )

Shape of X: (6497, 11) [[ 0.23   0.25  17.3   ...  0.42   9.2    6.   ]
 [ 0.43   0.37  10.    ...  0.64   9.5    5.   ]
 [ 0.25   0.49   2.7   ...  0.9   10.     6.   ]
 ...
 [ 0.5    0.35   2.9   ...  0.62   9.4    5.   ]
 [ 0.305  0.39   1.2   ...  0.52  11.5    6.   ]
 [ 0.41   0.39   2.2   ...  0.65  10.2    5.   ]]
Shape of Y: (6497, 1)


In [4]:
# single neuron model clasification class 

class single_neuron_classification_model():
    
    # init method
    
    def __init__(self, input_data ,output_data ):
        self.input_data = input_data  
        self.output_data = output_data 
        
        # initialize randomized weigths 
        self.w = 0.01 * np.random.randn(self.input_data.shape[1])
        self.w_0 = 0.01 * np.random.randn()
     
    
    # sigmoid activation 
    def sigmoid(self,z): 
        non_zero_tolerance = 1e-8
        return 1 / (1 + math.exp(-z) + non_zero_tolerance)      

    # function to compute the output of the model
    def forward (self , x ):
        
        # Calculate pre-activation z
        z = x @ self.w.T + self.w_0
    
        # activation funcion
        a = self.sigmoid(z)
        return a


    # training function 

    def train_model_NLL_loss(self , learning_rate , num_epochs):
        
        # ensure avoiding log of 0 
        non_zero_tolerance = 1e-8 
    
         
    
        for epoch in range(num_epochs):
            # keep track of total loss
            total_loss = 0 
        
            for x , y in zip( self.input_data , self.output_data):
                
                y_predicted = self.forward(x) 
                nll_loss = -(y * math.log(y_predicted + non_zero_tolerance) + (1-y) * math.log(1-y_predicted + non_zero_tolerance))
            
                total_loss += nll_loss
            
                
                # update bias coeficient using the gradients 
            
                self.w_0 -= learning_rate * (y_predicted - y)
            
                # update model coeficients using the gradients  
            
                for j, x_j in enumerate(x):
                    self.w[j] -= learning_rate * (y_predicted - y) * x_j
            
                
            # print loss at some epochs 
            report_every = max(1, num_epochs // 10)
            if epoch % report_every == 0: #every few epochs, report on progress
                print("epoch", epoch, "has total loss", total_loss)
           
        return self.w, self.w_0

# evaluation function

def evaluate_classification_accuracy(model, input_data, labels):
    # Count the number of correctly classified samples given a set of weights
    correct = 0
    num_samples = len(input_data)
    for i in range(num_samples):
        x = input_data[i,:]
        y = labels[i]
        y_predicted = model.forward(x)
        label_predicted = 1 if y_predicted > 0.5 else 0
        if label_predicted == y:
            correct += 1
    accuracy = correct / num_samples
    print("Our model predicted", correct, "out of", num_samples,
          "correctly for", accuracy*100, "% accuracy")
    return accuracy

In [5]:
# set parameters for training 

learning_rate = 0.001
num_epochs = 200

# set the model 
model = single_neuron_classification_model(input_data = X , 
                                           output_data = Y )

# train the model
model.train_model_NLL_loss( learning_rate = learning_rate ,
                           num_epochs = num_epochs)
# evaluate the model
evaluate_classification_accuracy(model,X,Y)




epoch 0 has total loss [4137.21477836]
epoch 20 has total loss [2851.34379812]
epoch 40 has total loss [2421.24623923]
epoch 60 has total loss [2153.06271791]
epoch 80 has total loss [2019.12311044]
epoch 100 has total loss [1895.78829091]
epoch 120 has total loss [1806.93686478]
epoch 140 has total loss [1746.44930913]
epoch 160 has total loss [1693.96063553]
epoch 180 has total loss [1641.92773399]
Our model predicted 3727 out of 6497 correctly for 57.36493766353702 % accuracy


0.5736493766353702