# PyTorch Logistic Regression for Digits Dataset

In [None]:
# Import necessary librariesimport torchimport torch.nn as nnimport torch.optim as optimfrom torch.utils.data import DataLoader, TensorDatasetfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import confusion_matrix, classification_report, accuracy_scoreimport numpy as npimport matplotlib.pyplot as pltimport itertools

## Configuration and Setup

In [None]:
# HyperparametersLEARNING_RATE = 0.01BATCH_SIZE = 64EPOCHS = 50 # You might need more epochs for better convergence# Device Configuration (GPU if available, else CPU)device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')print(f"Using device: {device}")# Set random seed for reproducibilitySEED = 42np.random.seed(SEED)torch.manual_seed(SEED)if torch.cuda.is_available():    torch.cuda.manual_seed_all(SEED)

## Data Loading and Preparation

In [None]:
# Load the digits datasetprint("Loading digits dataset...")digit_dataset = datasets.load_digits()print("Dataset loaded.")

In [None]:
# Assign features (X) and target (y) from datasetX_np = digit_dataset.datay_np = digit_dataset.target# Normalize the features (pixel values 0-16 -> 0-1)X_norm_np = X_np / 16.0# Split data into training and testing setsprint("Splitting data into train/test sets...")X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(    X_norm_np, y_np, test_size=0.25, random_state=SEED, stratify=y_np # Use normalized X, Stratify for balanced classes)print(f"Data split: X_train: {X_train_np.shape}, X_test: {X_test_np.shape}, y_train: {y_train_np.shape}, y_test: {y_test_np.shape}")

In [None]:
# Convert NumPy arrays to PyTorch tensorsX_train = torch.tensor(X_train_np, dtype=torch.float32)y_train = torch.tensor(y_train_np, dtype=torch.long) # CrossEntropyLoss expects Long type for labelsX_test = torch.tensor(X_test_np, dtype=torch.float32)y_test = torch.tensor(y_test_np, dtype=torch.long)# Create TensorDatasets and DataLoaderstrain_dataset = TensorDataset(X_train, y_train)test_dataset = TensorDataset(X_test, y_test)train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Model Definition

In [None]:
# Logistic Regression is equivalent to a single linear layer in a neural network# Input features = 64 (8x8 image flattened)# Output features = 10 (number of classes, digits 0-9)class PyTorchLogisticRegression(nn.Module):    def __init__(self, input_dim, output_dim):        super(PyTorchLogisticRegression, self).__init__()        self.linear = nn.Linear(input_dim, output_dim)    def forward(self, x):        # No activation function here, as CrossEntropyLoss applies LogSoftmax internally        outputs = self.linear(x)        return outputs

## Instantiate Model, Loss, Optimizer

In [None]:
input_dim = X_train.shape[1] # Should be 64output_dim = len(digit_dataset.target_names) # Should be 10model = PyTorchLogisticRegression(input_dim, output_dim).to(device)print("\nModel Architecture:")print(model)# CrossEntropyLoss combines LogSoftmax and NLLLoss - suitable for multi-class classificationcriterion = nn.CrossEntropyLoss()# Optimizer (Adam is a popular choice)optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)# You could also use SGD:# optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

## Training Loop

In [None]:
print(f"\nStarting Training for {EPOCHS} epochs...")for epoch in range(EPOCHS):    model.train() # Set model to training mode    running_loss = 0.0    for i, (features, labels) in enumerate(train_loader):        # Move tensors to the configured device        features = features.to(device)        labels = labels.to(device)        # Forward pass        outputs = model(features)        loss = criterion(outputs, labels)        # Backward pass and optimize        optimizer.zero_grad() # Clear previous gradients        loss.backward()       # Compute gradients        optimizer.step()        # Update weights        running_loss += loss.item()    # Print average loss for the epoch    epoch_loss = running_loss / len(train_loader)    if (epoch + 1) % 5 == 0 or epoch == 0: # Print every 5 epochs or the first epoch      print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {epoch_loss:.4f}")print("Training finished.")

## Evaluation

In [None]:
print("\nEvaluating model on the test set...")model.eval() # Set model to evaluation modeall_preds = []all_labels = []# Disable gradient calculations during evaluationwith torch.no_grad():    correct = 0    total = 0    for features, labels in test_loader:        features = features.to(device)        labels = labels.to(device)        outputs = model(features)        # Get predictions from the maximum value output (logits)        # The output is (batch_size, num_classes)        _, predicted = torch.max(outputs.data, 1)        total += labels.size(0)        correct += (predicted == labels).sum().item()        # Store predictions and labels for detailed metrics        all_preds.extend(predicted.cpu().numpy())        all_labels.extend(labels.cpu().numpy())accuracy = 100 * correct / totalprint(f"\nTest Accuracy: {accuracy:.2f}%")

## Detailed Metrics (Confusion Matrix, Classification Report)

In [None]:
print("\n--- Detailed Test Set Metrics ---")print("\nConfusion Matrix:")cm = confusion_matrix(all_labels, all_preds)print(cm)print("\nClassification Report:")print(classification_report(all_labels, all_preds, target_names=[str(i) for i in digit_dataset.target_names]))

## Confusion Matrix Plotting

In [None]:
# Define the plotting function (from scikit-learn examples)def plot_confusion_matrix(cm, classes,                          normalize=False,                          title='Confusion matrix',                          cmap=plt.cm.Blues):    """    This function prints and plots the confusion matrix.    Normalization can be applied by setting `normalize=True`.    """    if normalize:        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]        print("Normalized confusion matrix")    else:        print('Confusion matrix, without normalization')    # print(cm) # Optionally print matrix values here    plt.imshow(cm, interpolation='nearest', cmap=cmap)    plt.title(title)    plt.colorbar()    tick_marks = np.arange(len(classes))    plt.xticks(tick_marks, classes, rotation=45)    plt.yticks(tick_marks, classes)    fmt = '.2f' if normalize else 'd'    thresh = cm.max() / 2.    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):        plt.text(j, i, format(cm[i, j], fmt),                 horizontalalignment="center",                 color="white" if cm[i, j] > thresh else "black")    plt.tight_layout()    plt.ylabel('True label')    plt.xlabel('Predicted label')

In [None]:
print("\nPlotting Confusion Matrix...")class_names = [str(i) for i in digit_dataset.target_names]# Plot non-normalized confusion matrixplt.figure(figsize=(8, 6))plot_confusion_matrix(cm, classes=class_names,                      title='PyTorch Confusion Matrix (Digits), without normalization')plt.show() # Show plot immediately after creation# Plot normalized confusion matrixplt.figure(figsize=(8, 6))plot_confusion_matrix(cm, classes=class_names, normalize=True,                      title='PyTorch Normalized Confusion Matrix (Digits)')plt.show() # Show plot immediately after creationprint("\nScript finished.")