In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_theme()
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits # Scikit-Learn’s set of preformatted digit which is built into the library.
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import label_binarize
#ignore warnings
import warnings

warnings.filterwarnings('ignore')

In [None]:
digits = load_digits()
digits.images.shape

The images data is a three-dimensional array: 1,797 samples, each consisting of an
8×8 grid of pixels. Let’s visualize the first hundred of these


In [None]:
fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={
                         'xticks': [], 'yticks': []}, gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i, ax in enumerate(axes.flat):
    ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
    ax.text(0.05, 0.05, str(digits.target[i]),
            transform=ax.transAxes, color='green')

In order to work with this data within Scikit-Learn, we need a two-dimensional, [n_samples, n_features] representation. We can accomplish this by treating each\
pixel in the image as a feature—that is, by flattening out the pixel arrays so that we have a length-64 array of pixel values representing each digit. Additionally, we need\
the target array, which gives the previously determined label for each digit. These two quantities are built into the digits dataset under the data and target attributes,\
respectively:


In [None]:
X = digits.data
y = digits.target
X.shape, y.shape

#### Logistic Regression from scratch


We'll use a softmax function and not sigmoid since sigmoid only works for binary classification. Softmax on the other hand can be used in multinomial classification

In [None]:
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

In [None]:
# Create a 2D array of shape (3, 4) filled with random numbers
z = np.random.rand(3, 4)

In [None]:
plt.plot(z, softmax(z))

In [None]:
XTX = X.T.dot(X)
XTX

In [None]:
XTX.shape

In [None]:
XTX = X.T.dot(X)
XTX_inv = np.linalg.pinv(np.array(XTX))

In [None]:
res = XTX.dot(XTX_inv)
np.around(res, decimals=10)

In [None]:
weights_full = XTX_inv.dot(X.T).dot(y)
w0 = weights_full[0]
w = weights_full[1:]

In [None]:
w0

In [None]:
w

In [None]:
w.shape

In [None]:

class MultinomialLogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=100):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations

    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

    def fit(self, X, y):
        self.weights = np.zeros((X.shape[1], len(np.unique(y))))
        self.classes = np.unique(y)
        y = np.eye(len(self.classes))[y]

        for i in range(self.num_iterations):
            scores = np.dot(X, self.weights)
            predictions = self.softmax(scores)
            gradient = np.dot(X.T, (predictions - y)) / y.shape[0]
            self.weights -= self.learning_rate * gradient

    def predict(self, X):
        scores = np.dot(X, self.weights)
        predictions = np.argmax(self.softmax(scores), axis=1)
        return self.classes[predictions]

In [None]:
# Loading the digits dataset
digits = load_digits()

# Standardizingthe features
scaler = StandardScaler()
X = scaler.fit_transform(digits.data)

# Getting the labels
y = digits.target

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(

    X, y, test_size=0.2, random_state=42)

# Creating and train the model
model = MultinomialLogisticRegression(learning_rate=0.01, num_iterations=300)
model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = model.predict(X_test)

# Calculating the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")

In [None]:
# Now comparing with scikit-learn's logistic regression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs')
lr.fit(X_train, y_train)
y_pred_sklearn = lr.predict(X_test)

print("Scikit-learn's implementation accuracy: ",
      accuracy_score(y_test, y_pred_sklearn))

### Model Evaluation
1. Accuracy

In [None]:
print(
    f"My implementation accuracy: {round(accuracy_score(y_test, y_pred), 4) * 100}%")
print(
    f"Scikit-learn's implementation accuracy: {round(accuracy_score(y_test, y_pred_sklearn), 4) * 100}%")

2. ROC-AUC score

In [None]:
# Binarize the output
y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y_pred_bin = label_binarize(y_pred, classes=[
                            0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y_pred_sklearn_bin = label_binarize(
    y_pred_sklearn, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

print("Our implementation AUC: ", roc_auc_score(
    y_test_bin, y_pred_bin, multi_class='ovr'))
print("Scikit-learn's implementation AUC: ",
      roc_auc_score(y_test_bin, y_pred_sklearn_bin, multi_class='ovr'))

3. Precision, Recall and F1-score

In [None]:
print("My implementation classification report: \n",
      classification_report(y_test, y_pred))

print("Scikit-learn's implementation classification report: \n",
      classification_report(y_test, y_pred_sklearn))

### Visualizing the results

In [None]:
# Create a new figure
plt.figure(figsize=(8, 6))

# Create a histogram of the implementation's predictions
sns.histplot(y_pred, color='red', alpha=0.5, label='My Implementation')

# Create a histogram of scikit-learn's predictions
sns.histplot(y_pred_sklearn, color='blue', alpha=0.5, label='Scikit-Learn')

# Add a legend
plt.legend()

# Show the plot
plt.show()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 6))

# Create a histogram of the implementation's predictions
sns.histplot(y_pred, color='red', alpha=0.5,
             label='My Implementation', ax=ax[0], kde=True)

# Create a histogram of scikit-learn's predictions
sns.histplot(y_pred_sklearn, color='blue', alpha=0.5,
             label='Scikit-Learn', ax=ax[0], kde=True)

# Add a legend
ax[0].legend()

# Plot our implementation
sns.heatmap(cm_ours, annot=True, fmt='d', ax=ax[1], cmap='Blues')
ax[1].set_title('My Implementation')
ax[1].set_xlabel('Predicted')
ax[1].set_ylabel('True')

# Plot scikit-learn's implementation
sns.heatmap(cm_sklearn, annot=True, fmt='d', ax=ax[2], cmap='Blues')
ax[2].set_title('Scikit-learn\'s Implementation')
ax[2].set_xlabel('Predicted')
ax[2].set_ylabel('True')

# Show the plot
plt.show()

Our model is also typically having lower values on multiple evaluation metrics.\
This could be due to a variety of reasons:

1. **Model Complexity**: The model implemented is a simple multinomial logistic regression model. It's a linear model, which means it may not capture complex relationships between features. On the other hand, models in libraries like scikit-learn often include additional features like regularization, which can help them perform better on complex datasets.

2. **Optimization Algorithm**: The model implemented uses gradient descent for optimization, which is a simple and widely used method. However, it might not always be the best choice. Scikit-learn's `LogisticRegression` uses more advanced optimization algorithms (like 'liblinear' or 'lbfgs') that often converge faster and find better solutions.

3. **Hyperparameters**: The learning rate and the number of iterations are hyperparameters of the gradient descent algorithm, and they can significantly affect the performance of the model. If they are not set properly, the model might not learn effectively from the data. Scikit-learn's models have mechanisms to tune these hyperparameters.

4. **Numerical Stability**: The implementation of certain functions (like softmax) can suffer from numerical instability, leading to incorrect calculations. Scikit-learn has a stable implementations of these functions.

In [None]:
warnings.filterwarnings('default')