## Logistic Regression
| Reece Buyan

In [3]:
from cifar10_import import *
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

import math
import numpy as np

In [4]:
x_images, y_labels = importCifar10("combined")

In [5]:
# Flatten the images to use as input for logistic regression
flattened_x_img = flattenImageSet(x_images)

In [6]:
# Created the training dataset (60% is )
X_train, X_test, y_train, y_test = train_test_split(flattened_x_img, y_labels, test_size=0.6, stratify=y_labels, shuffle=True)

# Further split X_test into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, stratify=y_test, shuffle=True)

In [7]:
# Scale images to help with model convergence
X_train_scaled, scaler = scaleImages(X_train)
X_val_scaled = scaleImages(X_val, scaler)
X_test_scaled = scaleImages(X_test, scaler)

In [11]:
# Incorrectly created a classifier (y_pred should have used X_test instead of X_train)
classifier = LogisticRegression(penalty = 'l1', solver = 'liblinear', fit_intercept = True)
classifier.fit(X_train_scaled, y_train)

y_pred = classifier.predict(X_train_scaled)
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=class_names))

In [8]:
# Made the necessary adjustments for the classifier
classifier = LogisticRegression(penalty = 'l1', solver = 'liblinear', fit_intercept = True)
classifier.fit(X_train_scaled, y_train)

y_pred = classifier.predict(X_test_scaled)
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=class_names))

Accuracy on test set: 0.34755555555555556
              precision    recall  f1-score   support

    airplane       0.41      0.46      0.43      1800
  automobile       0.40      0.41      0.40      1800
        bird       0.27      0.23      0.25      1800
         cat       0.22      0.21      0.22      1800
        deer       0.31      0.29      0.30      1800
         dog       0.28      0.28      0.28      1800
        frog       0.34      0.37      0.35      1800
       horse       0.37      0.36      0.36      1800
        ship       0.45      0.49      0.47      1800
       truck       0.38      0.38      0.38      1800

    accuracy                           0.35     18000
   macro avg       0.34      0.35      0.35     18000
weighted avg       0.34      0.35      0.35     18000



In [8]:
# Used l2 penalty to handle multinomal loss
classifier = LogisticRegression(penalty = 'l2', solver = 'liblinear', fit_intercept = True)
classifier.fit(X_train_scaled, y_train)

y_pred = classifier.predict(X_test_scaled)
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=class_names))

Accuracy on test set: 0.33505555555555555
              precision    recall  f1-score   support

    airplane       0.40      0.41      0.41      1800
  automobile       0.39      0.39      0.39      1800
        bird       0.24      0.22      0.23      1800
         cat       0.23      0.22      0.22      1800
        deer       0.29      0.28      0.28      1800
         dog       0.26      0.26      0.26      1800
        frog       0.33      0.35      0.34      1800
       horse       0.38      0.37      0.38      1800
        ship       0.43      0.47      0.45      1800
       truck       0.37      0.37      0.37      1800

    accuracy                           0.34     18000
   macro avg       0.33      0.34      0.33     18000
weighted avg       0.33      0.34      0.33     18000



In [9]:
# Additionally used l2 penalty and sage solver to handle multinomal loss and a larger dataset
classifier = LogisticRegression(penalty = 'l2', solver = 'saga', fit_intercept = True)
classifier.fit(X_train_scaled, y_train)

y_pred = classifier.predict(X_test_scaled)
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=class_names))



Accuracy on test set: 0.37583333333333335
              precision    recall  f1-score   support

    airplane       0.44      0.47      0.45      1800
  automobile       0.44      0.45      0.44      1800
        bird       0.28      0.25      0.27      1800
         cat       0.25      0.24      0.24      1800
        deer       0.33      0.31      0.32      1800
         dog       0.29      0.28      0.29      1800
        frog       0.38      0.41      0.39      1800
       horse       0.43      0.41      0.42      1800
        ship       0.48      0.51      0.50      1800
       truck       0.42      0.43      0.42      1800

    accuracy                           0.38     18000
   macro avg       0.37      0.38      0.37     18000
weighted avg       0.37      0.38      0.37     18000



In [10]:
# Added the multi_class parameter
classifier = LogisticRegression(multi_class='multinomial', solver='saga', fit_intercept = True)
classifier.fit(X_train_scaled, y_train)

y_pred = classifier.predict(X_test_scaled)
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=class_names))



Accuracy on test set: 0.3755
              precision    recall  f1-score   support

    airplane       0.44      0.47      0.45      1800
  automobile       0.43      0.45      0.44      1800
        bird       0.28      0.25      0.26      1800
         cat       0.25      0.23      0.24      1800
        deer       0.33      0.31      0.32      1800
         dog       0.29      0.28      0.29      1800
        frog       0.38      0.41      0.39      1800
       horse       0.43      0.41      0.42      1800
        ship       0.48      0.51      0.50      1800
       truck       0.42      0.43      0.42      1800

    accuracy                           0.38     18000
   macro avg       0.37      0.38      0.37     18000
weighted avg       0.37      0.38      0.37     18000

