In [None]:
# Required packages
!pip install torch
!pip install seaborn

In [None]:
import os
from torch.utils import cpp_extension


# Not sure if these is really needed atm,
# but I'm keeping it here just in case.
def print_compiledir_content():
    compile_dir = cpp_extension.get_default_build_root()
    if os.path.exists(compile_dir):
        print("Contents of the compiled directory:")
        for item in os.listdir(compile_dir):
            print(item)
    else:
        print("Compiled directory does not exist.")


print_compiledir_content()


In [133]:
# I don't think we need any of this
# either, but I'll also keep this here
# just for the time being.
MODEL_PATH = "./model/"
DATA_PATH = "./data/"

if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)


In [134]:
import torch.nn as nn


class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)
        self.sigmoid = nn.Sigmoid()


    def forward(self, x):
        out = self.linear(x)
        out = self.sigmoid(out)
        return out


In [135]:
import pandas
import torch
import torch.optim as optim
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ReduceLROnPlateau


def train_target_model(input_size, x_train_tensor, y_train_tensor,
                       num_epochs=1000):
    """
        train_target_model

        Trains a linear regression model w/ a
        binary cross-entropy (log loss) loss
        function and an Adam optimizer

        Args:
            input_size (int): Num of features in the input data
            x_train_tensor (torch.Tensor): Input training tensor
            y_train_tensor (torch.Tensor): Target training tensor
            num_epochs (int, optional): Num of training epochs (def. is 1000)

        Returns:
            torch.nn.Module: Trained logistic regression model
    """
    model = LogisticRegressionModel(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.1,
                                  verbose=True)

    for epoch in range(num_epochs):
        model.train()
        outputs = model(x_train_tensor)
        loss = criterion(outputs, y_train_tensor)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step(loss)

        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

    return model


In [136]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, cmap='Blues', fmt='g')
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.title('Confusion Matrix')
    plt.show()

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight

bcancer = datasets.load_breast_cancer()
print(bcancer.DESCR)

x = bcancer.data
y = bcancer.target
dataframe = pandas.DataFrame(x, columns=bcancer.feature_names)
dataframe['Diagnosis'] = y

train_df, test_df = train_test_split(dataframe, test_size=0.2, random_state=1)

x_train = train_df.drop("Diagnosis", axis=1).values
y_train = train_df["Diagnosis"].values
x_test = test_df.drop("Diagnosis", axis=1).values
y_test = test_df["Diagnosis"].values

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train_tensor = torch.tensor(x_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
x_test_tensor = torch.tensor(x_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

model = train_target_model(x_train.shape[1], x_train_tensor, y_train_tensor)

model.eval()
with torch.no_grad():
    y_pred_prob = model(x_test_tensor)
    y_pred = (y_pred_prob >= 0.5).float()

print("-" * 50)
y_pred = y_pred.numpy().flatten()
y_true = y_test_tensor.numpy().flatten()
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
print(f"Accuracy:\t%0.4f\t(TP + TN)/(TP+TN+FP+FN)" % accuracy)
print("Precision:\t%0.4f\tTP/(TP+FP)" % precision)
print("Recall:\t\t%0.4f\tTP/(TP+FN)" % recall)    # this needs two tabs to be aligned w/ others
print("F1 Score:\t%0.4f\t2*(Precision * Recall)/(Precision + Recall)" % f1)

plot_confusion_matrix(y_true, y_pred)