In [89]:
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch import nn
import pandas as pd

In [90]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [91]:
df = pd.read_csv("cancer.csv")
df.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL_CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,0,1,1,0,0,1,0,1,1,1,1,1,1,1
1,M,74,1,0,0,0,1,1,1,0,0,0,1,1,1,1
2,F,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0
3,M,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0
4,F,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0


In [92]:
# create label encoder to make strings into categorized numericals
le = LabelEncoder()

# list of columns needed to be encoded
le_cols = ["GENDER"]

# encode every listed column
for column in df.columns:
    if column in le_cols:
        le.fit(df[column])
        df[column] = le.transform(df[column])


df.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL_CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,1,69,0,1,1,0,0,1,0,1,1,1,1,1,1,1
1,1,74,1,0,0,0,1,1,1,0,0,0,1,1,1,1
2,0,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0
3,1,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0
4,0,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0


In [93]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(["LUNG_CANCER"], axis=1), df["LUNG_CANCER"], test_size=0.2)

X_train = torch.tensor(X_train.values).type(torch.float32).to(device)
X_test = torch.tensor(X_test.values).type(torch.float32).to(device)
y_train = torch.tensor(y_train.values).type(torch.float32).to(device)
y_test = torch.tensor(y_test.values).type(torch.float32).to(device)

In [94]:
class CancerClassifier(nn.Module):
    def __init__(self, no_of_features, no_of_neurons):
        super().__init__()

        self.layer_stack = nn.Sequential(
            nn.Linear(no_of_features, no_of_neurons),
            nn.ReLU(),
            nn.Linear(no_of_neurons, 1)
        )
    def forward(self, x):
        return self.layer_stack(x)

model = CancerClassifier(df.shape[1]-1, 16)
model.to(device)

CancerClassifier(
  (layer_stack): Sequential(
    (0): Linear(in_features=15, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [95]:
y_logits = model(X_train)
y_probs = torch.sigmoid(y_logits)
y_probs[:5]

tensor([[0.2110],
        [0.1910],
        [0.1865],
        [0.1458],
        [0.1548]], device='cuda:0', grad_fn=<SliceBackward0>)

In [96]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001, weight_decay=1e-5)

In [97]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

In [98]:
torch.manual_seed(42)

epochs = 1000

# train test loop
for epoch in range(epochs):

    # TRAINING
    model.train()

    # forward pass
    y_logits = model(X_train).squeeze()
    y_pred = torch.sigmoid(y_logits).round()

    # metrics
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_train, y_pred)

    # opt zero grad
    optimizer.zero_grad()

    # loss backwards
    loss.backward()

    # opt step
    optimizer.step()

    # TESTING
    model.eval()

    with torch.inference_mode():

        # forward pass
        test_logits = model(X_test).squeeze()
        test_pred = torch.sigmoid(test_logits).round()

        # test metrics
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_test, test_pred)

    # print results
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train Loss: {loss:.4f} | Train Accuracy: {acc:.2f}% | Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")

Epoch: 0 | Train Loss: 1.4955 | Train Accuracy: 12.96% | Test Loss: 0.7789 | Test Accuracy: 14.52%
Epoch: 100 | Train Loss: 0.1792 | Train Accuracy: 91.90% | Test Loss: 0.1577 | Test Accuracy: 95.16%
Epoch: 200 | Train Loss: 0.1469 | Train Accuracy: 94.33% | Test Loss: 0.1801 | Test Accuracy: 95.16%
Epoch: 300 | Train Loss: 0.1377 | Train Accuracy: 93.93% | Test Loss: 0.1920 | Test Accuracy: 95.16%
Epoch: 400 | Train Loss: 0.1350 | Train Accuracy: 94.33% | Test Loss: 0.1931 | Test Accuracy: 95.16%
Epoch: 500 | Train Loss: 0.1338 | Train Accuracy: 94.33% | Test Loss: 0.1977 | Test Accuracy: 95.16%
Epoch: 600 | Train Loss: 0.1333 | Train Accuracy: 94.33% | Test Loss: 0.2005 | Test Accuracy: 95.16%
Epoch: 700 | Train Loss: 0.1332 | Train Accuracy: 94.33% | Test Loss: 0.2019 | Test Accuracy: 95.16%
Epoch: 800 | Train Loss: 0.1329 | Train Accuracy: 94.33% | Test Loss: 0.2013 | Test Accuracy: 95.16%
Epoch: 900 | Train Loss: 0.1327 | Train Accuracy: 94.33% | Test Loss: 0.1988 | Test Accuracy:

In [99]:
model.eval()

with torch.inference_mode():
    eval_logits = model(X_test[0:5])

eval_prob = torch.sigmoid(eval_logits)
eval_pred = eval_prob.round()

eval_pred, y_test[0:5]

(tensor([[1.],
         [1.],
         [1.],
         [1.],
         [0.]], device='cuda:0'),
 tensor([1., 1., 1., 1., 0.], device='cuda:0'))