In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
import numpy as np


### Steps:
- Import digits data set from Scikitlearn datasets library. Use `load_digits()`. When loaded, the dataset comes with `data` and `target` values.
- Assign `data` to X and `target` to y
- Check the shape of the data
- Use `np.bincount` to print the number of uniqe elements of the target vriable `y`
- Split data into train and test datasets. Use stratification when splitting. You can set your `random_state` to `42`
- Normalize your dataset. When normalizing, simply divide your dataset by the maximum of the train dataset. To find the maximum, use `max(

In [4]:
data, target = load_digits(return_X_y=True)

In [5]:
X,y = data, target

In [6]:
X.shape

(1797, 64)

In [8]:
y.shape

(1797,)

In [9]:
np.bincount(y)

array([178, 182, 177, 183, 181, 182, 181, 179, 174, 180], dtype=int64)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [19]:
X_train.max(axis=0)

array([ 0.,  8., 16., 16., 16., 16., 16., 15.,  2., 16., 16., 16., 16.,
       16., 16., 12.,  2., 16., 16., 16., 16., 16., 16.,  8.,  1., 15.,
       16., 16., 16., 16., 15.,  1.,  0., 14., 16., 16., 16., 16., 14.,
        0.,  4., 16., 16., 16., 16., 16., 16.,  6.,  8., 16., 16., 16.,
       16., 16., 16., 10.,  1.,  7., 16., 16., 16., 16., 16., 16.])

In [21]:
X_train_norm, X_test_norm = X_train / X_train.max(axis=0), X_test / X_test.max(axis=0)

  X_train_norm, X_test_norm = X_train / X_train.max(axis=0), X_test / X_test.max(axis=0)


In [22]:
X_train_norm

array([[   nan, 0.    , 0.125 , ..., 0.    , 0.    , 0.    ],
       [   nan, 0.625 , 1.    , ..., 0.375 , 0.0625, 0.    ],
       [   nan, 0.    , 0.25  , ..., 0.125 , 0.    , 0.    ],
       ...,
       [   nan, 0.    , 0.5625, ..., 1.    , 0.125 , 0.    ],
       [   nan, 0.    , 0.0625, ..., 0.    , 0.    , 0.    ],
       [   nan, 0.    , 0.0625, ..., 0.0625, 0.    , 0.    ]])

In [24]:
# fill nan value with 0
X_train_norm = np.nan_to_num(X_train_norm)
X_test_norm = np.nan_to_num(X_test_norm)

In [25]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train_norm, y_train)
logreg_pred = logreg.predict(X_test_norm)

logreg_accuracy = accuracy_score(y_test, logreg_pred)
logreg_precision = precision_score(y_test, logreg_pred, average='weighted')
logreg_recall = recall_score(y_test, logreg_pred, average='weighted')
logreg_f1 = f1_score(y_test, logreg_pred, average='weighted')

# KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train_norm, y_train)
knn_pred = knn.predict(X_test_norm)

knn_accuracy = accuracy_score(y_test, knn_pred)
knn_precision = precision_score(y_test, knn_pred, average='weighted')
knn_recall = recall_score(y_test, knn_pred, average='weighted')
knn_f1 = f1_score(y_test, knn_pred, average='weighted')

# Print the results
print("Logistic Regression:")
print("Accuracy:", logreg_accuracy)
print("Precision:", logreg_precision)
print("Recall:", logreg_recall)
print("F1-score:", logreg_f1)

print("\nKNeighborsClassifier:")
print("Accuracy:", knn_accuracy)
print("Precision:", knn_precision)
print("Recall:", knn_recall)
print("F1-score:", knn_f1)

Logistic Regression:
Accuracy: 0.9688888888888889
Precision: 0.9697211775878442
Recall: 0.9688888888888889
F1-score: 0.9690367601641102

KNeighborsClassifier:
Accuracy: 0.9933333333333333
Precision: 0.9933257345491388
Recall: 0.9933333333333333
F1-score: 0.9933082556240451
