In [112]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd

In [113]:
# Data preprocessing

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

cleanedTrain = train.drop(["id", "age", "height(cm)"], axis = 1)
cleanedTest = test.drop(["id", "age", "height(cm)"], axis = 1)

In [114]:
class Model(nn.Module): # model being used for prediction
    def __init__(self, inFeats = 20, h1 = 10, h2 = 15, outFeats = 1):
        super().__init__()
        self.fc1 = nn.Linear(inFeats, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.out = nn.Linear(h2, outFeats)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x

In [115]:
# Seeding to make reproducible results
torch.manual_seed(251)

model = Model()

In [116]:
X = cleanedTrain.drop("smoking", axis = 1)
y = cleanedTrain["smoking"] # target

X = X.values
y = y.values

X = torch.FloatTensor(X)
y = torch.FloatTensor(y)

In [117]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

In [118]:
epochs = 100
for i in range(epochs):
    yPred = model.forward(X)
    loss = criterion(yPred.squeeze(), y)
    if i % 10 == 0:
        print(f"Epoch: {i}, Loss: {loss}")

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0, Loss: 1.5730324983596802
Epoch: 10, Loss: 0.6249719858169556
Epoch: 20, Loss: 0.5644246339797974
Epoch: 30, Loss: 0.5461413264274597
Epoch: 40, Loss: 0.5219748616218567
Epoch: 50, Loss: 0.505581796169281
Epoch: 60, Loss: 0.49228808283805847
Epoch: 70, Loss: 0.48403728008270264
Epoch: 80, Loss: 0.48121145367622375
Epoch: 90, Loss: 0.47355911135673523


In [119]:
correct = 0
with torch.no_grad():
    model.eval()
    for i, data in enumerate(X):
        yVal = model(data)
        prob = torch.sigmoid(yVal).item()  # Sigmoid used to get vals between 0 and 1 (probabilities)

        print(f"{i + 1}.) Predicted Probability: {prob:.4f} \t Actual: {y[i]}")

        #correct or not
        if yVal.argmax().item() == y[i]:
            correct += 1

print(f"{correct} were correct")

1.) Predicted Probability: 0.4227 	 Actual: 0.0
2.) Predicted Probability: 0.0334 	 Actual: 0.0
3.) Predicted Probability: 0.0066 	 Actual: 0.0
4.) Predicted Probability: 0.0073 	 Actual: 0.0
5.) Predicted Probability: 0.1059 	 Actual: 0.0
6.) Predicted Probability: 0.1120 	 Actual: 0.0
7.) Predicted Probability: 0.6011 	 Actual: 1.0
8.) Predicted Probability: 0.6410 	 Actual: 0.0
9.) Predicted Probability: 0.0457 	 Actual: 0.0
10.) Predicted Probability: 0.0096 	 Actual: 0.0
11.) Predicted Probability: 0.0170 	 Actual: 0.0
12.) Predicted Probability: 0.3439 	 Actual: 0.0
13.) Predicted Probability: 0.0950 	 Actual: 1.0
14.) Predicted Probability: 0.4088 	 Actual: 0.0
15.) Predicted Probability: 0.8291 	 Actual: 0.0
16.) Predicted Probability: 0.0210 	 Actual: 0.0
17.) Predicted Probability: 0.0740 	 Actual: 0.0
18.) Predicted Probability: 0.0168 	 Actual: 0.0
19.) Predicted Probability: 0.6121 	 Actual: 1.0
20.) Predicted Probability: 0.0126 	 Actual: 0.0
21.) Predicted Probability: 0

<h1>Evaluating Model</h1>

In [120]:
X = cleanedTest
X = X.values
X = torch.FloatTensor(X)

results = []

with torch.no_grad():
    model.eval()
    for i, data in enumerate(X):
        yVal = model(data)  # Raw logit output
        prob = torch.sigmoid(yVal).item()  # Apply sigmoid to get the probability

        print(f"{i + 1}.) Predicted Probability: {prob:.4f}")

        results.append(f"{prob:.4f}")

df = pd.DataFrame({
    "id": test["id"],
    "smoking": results
})

df.to_csv("preds.csv", index=False)

1.) Predicted Probability: 0.0419
2.) Predicted Probability: 0.0734
3.) Predicted Probability: 0.6954
4.) Predicted Probability: 0.6159
5.) Predicted Probability: 0.1717
6.) Predicted Probability: 0.6699
7.) Predicted Probability: 0.2511
8.) Predicted Probability: 0.5590
9.) Predicted Probability: 0.5688
10.) Predicted Probability: 0.6091
11.) Predicted Probability: 0.6822
12.) Predicted Probability: 0.6798
13.) Predicted Probability: 0.6272
14.) Predicted Probability: 0.4372
15.) Predicted Probability: 0.7905
16.) Predicted Probability: 0.7901
17.) Predicted Probability: 0.1812
18.) Predicted Probability: 0.3875
19.) Predicted Probability: 0.6400
20.) Predicted Probability: 0.0419
21.) Predicted Probability: 0.3710
22.) Predicted Probability: 0.7828
23.) Predicted Probability: 0.0527
24.) Predicted Probability: 0.1230
25.) Predicted Probability: 0.6921
26.) Predicted Probability: 0.1203
27.) Predicted Probability: 0.3185
28.) Predicted Probability: 0.5165
29.) Predicted Probability: 0