In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

appleData = pd.read_csv('apple_quality.csv') #predict if apple has good or bad quality
appleData = appleData.iloc[:1000] 
data = appleData.iloc[:, 1:appleData.shape[1]-1]
labels = appleData['Quality'].replace({'good': 1, 'bad': 0}) #encode labels

xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.2, random_state=42)

In [16]:
def predict(X, weights):
    pred = np.dot(X, weights)
    return 1.0 / (1.0 + np.exp(-pred))
    
def logisticRegression(X, y, learningRate, iterations):
    weights = np.zeros(X.shape[1])

    for _ in range(iterations):
        pred = predict(X, weights)
        error = y - pred
        grad = np.dot(X.T, error)
        weights += learningRate * grad #gradient ascent

    return weights

X = np.hstack((np.ones((xtrain.shape[0], 1)), xtrain))
weights = logisticRegression(X, ytrain, 1e-5, 100)

X = np.hstack((np.ones((xtest.shape[0], 1)), xtest))
pred = predict(X, weights)
pred[pred < 0.5] = 0
pred[pred >= 0.5] = 1

acc = accuracy_score(pred, ytest)
print("accuracy: ", acc)

accuracy:  0.735


In [17]:
#compare with sci-kit learn linear regression
model = LogisticRegression(penalty='l2')
model.fit(xtrain, ytrain)
pred = model.predict(xtest)
acc = accuracy_score(pred, ytest)
print("accuracy: ", acc)

accuracy:  0.745
