In [1]:
!pip install scikit-learn
!pip install torchvision



In [14]:
import torch
import torchvision
from torchvision import transforms
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import classification_report,accuracy_score,f1_score
from sklearn.model_selection import GridSearchCV,cross_validate,train_test_split
import matplotlib.pyplot as plt

In [15]:
transformer = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                              transforms.Lambda(lambda x: torch.flatten(x))])

train_set=torchvision.datasets.FashionMNIST('./files/fashion-mnist/', train=True, download=True,
                             transform=transformer)

test_set=torchvision.datasets.FashionMNIST('./files/fashion-mnist/', train=False, download=True,
                             transform=transformer)

batch_size_train= len(train_set)//3
batch_size_test=len(test_set)

In [16]:
train_loader = torch.utils.data.DataLoader(
  train_set,
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  test_set,
  batch_size=batch_size_test, shuffle=True)

train_enumerated = enumerate(train_loader)
batch_idx, (train_x, train_y) = next(train_enumerated)

test_enumerated = enumerate(test_loader)
batch_idx, (test_x, test_y) = next(test_enumerated)

In [4]:
def evaluate_models(models):
    results_short = {}
    for score in scores:
        print('='*40)
        print("# Tuning hyper-parameters for %s" % score)
        print()

        for m in model_lbls:
            print('-'*40)
            print("Trying model {}".format(models[m]['name']))
            clf = GridSearchCV(models[m]['estimator'], models[m]['param'], cv=5,
                               scoring='%s_macro' % score, 
                               return_train_score = False,
                               n_jobs = 2, 
                               )
            clf.fit(train_x, train_y)
            print_results(clf)
            results_short[m] = clf.best_score_
        print("Summary of results for {}".format(score))
        print("Estimator")
        for m in results_short.keys():
            print("{}\t - score: {:4.2}%".format(models[m]['name'], results_short[m]))

def print_results(model):
    print("Best parameters set found on train set:")
    print()
    # if best is linear there is no gamma parameter
    print(model.best_params_)
    print()
    print("Grid scores on train set:")
    print()
    means = model.cv_results_['mean_test_score']
    stds = model.cv_results_['std_test_score']
    params = model.cv_results_['params']
    print("Mean test score: {}".format(means))
    print("Std test score: {}".format(stds))
    print("Params test score: {}".format(params))
    print()
    print("Detailed classification report for the best parameter set:")
    print()
    print("The model is trained on the full train set.")
    print("The scores are computed on the full test set.")
    print()
    true_y, pred_y = test_y, model.predict(test_x)
    print(classification_report(true_y, pred_y))
    print()
    
def plot_scores_by_parameter(model,ks,X_train,X_test,y_train,y_test,visualize=True):
    train_scores = []
    test_scores = []
    
    for k in ks:
        clf = model(k).fit(X_train, y_train)
        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        
        train_scores.append(train_score)
        test_scores.append(test_score)
    if visualize:
        plt.figure(figsize=(10, 6))
        plt.plot(ks, train_scores, color='blue', label='train score')
        plt.plot(ks, test_scores, color='green', label='test score')
        plt.legend()
    return train_scores,test_scores

## Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression

Logistic regression is linear classifier that categorizes the example as the highest probable class based on linear model of the training data. Because the linear function is unbounded, in order to get probability, the result is the put in sigmoid function that gives a number between 0 and 1. The logistic regression is binary classifier by default, but can be expanded to multi class either by one versus rest scheme or multinomial generalization.
The latter scheme runs multiple binary regressions assuming independence of irrelevant alternatives ("The probability of taking the car to work, rather than the bus do not depend on whether I have a bicycle").

In [None]:
Logistic Regression uses different algorithms for solving the optimization problem.
Some solvers are limited for one versus rest schemes.

In [11]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression       ',
           'estimator': LogisticRegression(random_state=0), 
           'param': [{'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression       
Best parameters set found on train set:

{'solver': 'saga'}

Grid scores on train set:


Detailed classification report for the best parameter set:

The model is trained on the full train set.
The scores are computed on the full test set.

              precision    recall  f1-score   support

           0       0.78      0.79      0.79      1000
           1       0.97      0.95      0.96      1000
           2       0.72      0.73      0.72      1000
           3       0.83      0.85      0.84      1000
           4       0.73      0.75      0.74      1000
           5       0.95      0.91      0.93      1000
           6       0.60      0.56      0.58      1000
           7       0.90      0.94      0.92      1000
           8       0.93      0.94      0.93      1000
           9       0.94      0.94      0.94      1000

    accuracy                           0.84     1

The best solver for f1 score seems to be saga. It can handle multinomial schemes as well as one versus rest.

In [6]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression       ',
           'estimator': LogisticRegression(random_state=0), 
           'param': [{'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag']}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression       
Best parameters set found on train set:

{'solver': 'lbfgs'}

Grid scores on train set:

Mean test score: [0.84595143 0.84390669 0.83968037 0.84383827 0.84240093]
Std test score: [0.00680967 0.00641024 0.00649584 0.00655477 0.00603016]
Params test score: [{'solver': 'lbfgs'}, {'solver': 'liblinear'}, {'solver': 'newton-cg'}, {'solver': 'newton-cholesky'}, {'solver': 'sag'}]

Detailed classification report for the best parameter set:

The model is trained on the full train set.
The scores are computed on the full test set.

              precision    recall  f1-score   support

           0       0.80      0.81      0.80      1000
           1       0.98      0.95      0.96      1000
           2       0.72      0.74      0.73      1000
           3       0.83      0.85      0.84      1000
           4       0.72      0.75      0.73      1000
           5       0.92      0.

The second best solver seems to be lbfgs. We will compare them along with the different schemes: multinomial and one versus rest

In [7]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression       ',
           'estimator': LogisticRegression(random_state=0), 
           'param': [{'solver': ['lbfgs', 'saga'], 'multi_class': ['ovr', 'multinomial']}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression       
Best parameters set found on train set:

{'multi_class': 'ovr', 'solver': 'saga'}

Grid scores on train set:

Mean test score: [0.84397694 0.84826809 0.84595143 0.84578108]
Std test score: [0.00649334 0.00623359 0.00680967 0.00658662]
Params test score: [{'multi_class': 'ovr', 'solver': 'lbfgs'}, {'multi_class': 'ovr', 'solver': 'saga'}, {'multi_class': 'multinomial', 'solver': 'lbfgs'}, {'multi_class': 'multinomial', 'solver': 'saga'}]

Detailed classification report for the best parameter set:

The model is trained on the full train set.
The scores are computed on the full test set.

              precision    recall  f1-score   support

           0       0.79      0.80      0.79      1000
           1       0.98      0.95      0.96      1000
           2       0.72      0.74      0.73      1000
           3       0.82      0.86      0.84      1000
           4       0.

Saga beats lbfgs and one versus rest scheme seems to be the best.

The logistic regression uses regularization term to exclude extremely large values for any of the regression coefficients in the loss function. This way it can counter overfitting. Two different penalties are available: l1 and l2.

L1 called also Lasso adds the “absolute value of magnitude” of the coefficient as a penalty term to the loss function.
L2 called also Ridge regression adds the “squared magnitude” of the coefficient as the penalty term.
Elasticnet regularization is linear combination of l1 and l2.

Saga solver is compatible with all penalties.
We will compare them.

In [16]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression       ',
           'estimator': LogisticRegression(solver='saga', multi_class='ovr'), 
           'param': [{'penalty': ['l1', 'l2', 'elasticnet', None]}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression       
Best parameters set found on train set:

{'penalty': 'l1'}

Grid scores on train set:

Mean test score: [0.8443732  0.84332136        nan 0.84193589]
Std test score: [0.00701041 0.0078915         nan 0.00749001]
Params test score: [{'penalty': 'l1'}, {'penalty': 'l2'}, {'penalty': 'elasticnet'}, {'penalty': None}]

Detailed classification report for the best parameter set:

The model is trained on the full train set.
The scores are computed on the full test set.

              precision    recall  f1-score   support

           0       0.79      0.80      0.80      1000
           1       0.97      0.95      0.96      1000
           2       0.72      0.74      0.73      1000
           3       0.82      0.87      0.84      1000
           4       0.73      0.76      0.74      1000
           5       0.93      0.90      0.92      1000
           6       0.63      0.53     

Penalty l1 seems to give best results, although there is not much difference. We will try with different regularization strength. C is the inverse of the regularization strength, smaller C means stronger regularization.

In [22]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression',
           'estimator': LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1), 
           'param': [{'C': np.arange(0,10, 0.5)}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression
Best parameters set found on train set:

{'C': 0.5}

Grid scores on train set:

Mean test score: [       nan 0.84514973 0.84448116 0.84367096 0.8434938  0.84292958
 0.84296878 0.84269507 0.84258409 0.84246671 0.84242669 0.84242187
 0.84238005 0.84211274 0.84222319 0.84195481 0.84197405 0.84215416
 0.84217911 0.84215111]
Std test score: [       nan 0.00681315 0.00688036 0.00742467 0.00740458 0.00736772
 0.00737918 0.00723796 0.00756026 0.00785126 0.00778168 0.00777043
 0.00765114 0.00760023 0.00779215 0.00758156 0.00768901 0.00797297
 0.00776743 0.00767427]
Params test score: [{'C': 0.0}, {'C': 0.5}, {'C': 1.0}, {'C': 1.5}, {'C': 2.0}, {'C': 2.5}, {'C': 3.0}, {'C': 3.5}, {'C': 4.0}, {'C': 4.5}, {'C': 5.0}, {'C': 5.5}, {'C': 6.0}, {'C': 6.5}, {'C': 7.0}, {'C': 7.5}, {'C': 8.0}, {'C': 8.5}, {'C': 9.0}, {'C': 9.5}]

Detailed classification report for the best parameter set:

The mode

We get best result for c = 0.5. But because the tests were for 0, 0.5, 1, ... there is possibility to get even better results for values closer to 0.5

In [23]:
model_lbls = ['lr']

models = {
    'lr': {'name': 'Logistic Regression',
           'estimator': LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1), 
           'param': [{'C': np.arange(0.1, 1, 0.2)}],
          },
}

model_lbls = ['lr']
scores = ['f1']

evaluate_models(models)

# Tuning hyper-parameters for f1

----------------------------------------
Trying model Logistic Regression
Best parameters set found on train set:

{'C': 0.7000000000000001}

Grid scores on train set:

Mean test score: [0.82912841 0.84167787 0.845301   0.84556502 0.84441974]
Std test score: [0.00693436 0.00686029 0.00676712 0.00699932 0.0072387 ]
Params test score: [{'C': 0.1}, {'C': 0.30000000000000004}, {'C': 0.5000000000000001}, {'C': 0.7000000000000001}, {'C': 0.9000000000000001}]

Detailed classification report for the best parameter set:

The model is trained on the full train set.
The scores are computed on the full test set.

              precision    recall  f1-score   support

           0       0.79      0.80      0.80      1000
           1       0.97      0.95      0.96      1000
           2       0.72      0.74      0.73      1000
           3       0.82      0.87      0.85      1000
           4       0.73      0.76      0.75      1000
           5       0.94      0.9

### Bagging clasifier using logistic regression
We will try to gain better results using bagging ensemble learning

In [6]:
from sklearn.ensemble import BaggingClassifier

Ensemble with many estimators, each on a small set of the features (taken with replacement) and trained on the whole train data.

In [31]:
bg = BaggingClassifier(estimator=LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1, C = 0.7),
                           n_estimators=20,
                           max_features=0.1,
                           bootstrap_features=True,
                           n_jobs=-1)

bg.fit(train_x, train_y)
true_y, pred_y = test_y, bg.predict(test_x)
print(classification_report(true_y, pred_y))

              precision    recall  f1-score   support

           0       0.77      0.83      0.80      1000
           1       0.98      0.94      0.96      1000
           2       0.69      0.69      0.69      1000
           3       0.77      0.87      0.82      1000
           4       0.69      0.74      0.72      1000
           5       0.90      0.88      0.89      1000
           6       0.58      0.42      0.49      1000
           7       0.89      0.88      0.88      1000
           8       0.90      0.94      0.92      1000
           9       0.90      0.93      0.91      1000

    accuracy                           0.81     10000
   macro avg       0.81      0.81      0.81     10000
weighted avg       0.81      0.81      0.81     10000



Ensemble with many estimators, trained on all features but on a small set of the data (taken with replacement).

In [7]:
bg = BaggingClassifier(estimator=LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1, C = 0.7),
                           n_estimators=20,
                           max_samples=0.1,
                           bootstrap_features=True,
                           n_jobs=-1)

bg.fit(train_x, train_y)
true_y, pred_y = test_y, bg.predict(test_x)
print(classification_report(true_y, pred_y))

              precision    recall  f1-score   support

           0       0.78      0.81      0.80      1000
           1       0.97      0.94      0.96      1000
           2       0.70      0.72      0.71      1000
           3       0.79      0.87      0.83      1000
           4       0.71      0.77      0.74      1000
           5       0.91      0.86      0.89      1000
           6       0.64      0.47      0.54      1000
           7       0.88      0.89      0.88      1000
           8       0.90      0.94      0.92      1000
           9       0.88      0.94      0.91      1000

    accuracy                           0.82     10000
   macro avg       0.82      0.82      0.82     10000
weighted avg       0.82      0.82      0.82     10000



### AdaBoosting

In [11]:
from sklearn.ensemble import AdaBoostClassifier

In [17]:
ab = AdaBoostClassifier(estimator=LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1, C = 0.7, random_state=0))

ab.fit(train_x, train_y)
true_y, pred_y = test_y, ab.predict(test_x)
print(classification_report(true_y, pred_y))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1000
           1       0.00      0.00      0.00      1000
           2       0.00      0.00      0.00      1000
           3       0.10      1.00      0.18      1000
           4       0.00      0.00      0.00      1000
           5       0.00      0.00      0.00      1000
           6       0.00      0.00      0.00      1000
           7       0.00      0.00      0.00      1000
           8       0.00      0.00      0.00      1000
           9       0.00      0.00      0.00      1000

    accuracy                           0.10     10000
   macro avg       0.01      0.10      0.02     10000
weighted avg       0.01      0.10      0.02     10000



## Conclusion

We conclude that the best hyper parameters for the problem are:
Solver: 'SAGA',
Scheme: One versus Rest,
Regularization: L1
and inverse regularization strength C = 0.7

Logistic regression does not benefit from ensemble techniques.
Because the tests were done for 20 000 entries in the train data in order results to be consistent with the other algorithm tests we will do final test once more.

In [8]:
transformer = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                              transforms.Lambda(lambda x: torch.flatten(x))])

train_set=torchvision.datasets.FashionMNIST('./files/fashion-mnist/', train=True, download=True,
                             transform=transformer)

test_set=torchvision.datasets.FashionMNIST('./files/fashion-mnist/', train=False, download=True,
                             transform=transformer)

batch_size_train= len(train_set)//5
batch_size_test=len(test_set)

In [9]:
train_loader = torch.utils.data.DataLoader(
  train_set,
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  test_set,
  batch_size=batch_size_test, shuffle=True)

train_enumerated = enumerate(train_loader)
batch_idx, (train_x, train_y) = next(train_enumerated)

test_enumerated = enumerate(test_loader)
batch_idx, (test_x, test_y) = next(test_enumerated)

In [13]:
lr = LogisticRegression(solver='saga', multi_class='ovr', penalty='l1', n_jobs=-1, C = 0.7)

lr.fit(train_x, train_y)
true_y, pred_y = test_y, lr.predict(test_x)
print(classification_report(true_y, pred_y))

              precision    recall  f1-score   support

           0       0.80      0.80      0.80      1000
           1       0.97      0.95      0.96      1000
           2       0.71      0.72      0.72      1000
           3       0.82      0.86      0.84      1000
           4       0.71      0.75      0.73      1000
           5       0.93      0.89      0.91      1000
           6       0.62      0.52      0.57      1000
           7       0.89      0.93      0.91      1000
           8       0.92      0.94      0.93      1000
           9       0.93      0.94      0.94      1000

    accuracy                           0.83     10000
   macro avg       0.83      0.83      0.83     10000
weighted avg       0.83      0.83      0.83     10000



Logistic regression scores 83% precision and f1