In [6]:
import pandas as pd

In [7]:
train = pd.read_csv('train_preprocessed.csv')
test = pd.read_csv('test_preprocessed.csv')

In [8]:
num_features = [ 'mainland_nights', 'island_nights', 'ratio', 'total_nights', 'male_count', 'female_count', 'total_travellers']

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

train[num_features] = scaler.fit_transform(train[num_features])
test[num_features] = scaler.transform(test[num_features])

## Neural Networks

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

In [11]:
test = test.drop(columns='visitor_nation')

In [12]:
trip_id = test['trip_ID']
test_data = test.drop(columns='trip_ID')

categories = train['category']
train_data = train.drop(columns='category')

assert (train_data.columns == test_data.columns).all()

In [13]:
X = torch.tensor(train_data.values, dtype=torch.float32)
y = torch.tensor(categories.values, dtype=torch.long)

X_test = torch.tensor(test_data.values, dtype=torch.float32)

In [None]:
# from imblearn.over_sampling import SMOTE

# smote = SMOTE(random_state=42)
# X, y = smote.fit_resample(X, y)

# X = torch.tensor(X, dtype=torch.float32)
# y = torch.tensor(y, dtype=torch.long)

In [15]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(27, 128)
        self.fc2 = nn.Linear(128, 64) 
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 3)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1) 
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc5(x)
        x = self.log_softmax(x)
        return x  

model = NeuralNetwork()

# Define a loss function and optimizer
criterion = nn.NLLLoss()  
optimizer = optim.SGD(model.parameters(), lr=0.01)

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
num_epochs = 100
batch_size = 128  # Batch size for training
num_batches = len(X) // batch_size

for epoch in range(num_epochs):
    for i in range(num_batches):
        start = i * batch_size
        end = start + batch_size
        X_batch = X[start:end]

        y_batch = y[start:end]

        log_probs = model(X_batch)  
        loss = criterion(log_probs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 0.1241
Epoch [2/100], Loss: 0.1586
Epoch [3/100], Loss: 0.1861
Epoch [4/100], Loss: 0.2479
Epoch [5/100], Loss: 0.2741
Epoch [6/100], Loss: 0.3633
Epoch [7/100], Loss: 0.4437
Epoch [8/100], Loss: 0.5122
Epoch [9/100], Loss: 0.6133
Epoch [10/100], Loss: 0.6406
Epoch [11/100], Loss: 0.6304
Epoch [12/100], Loss: 0.7213
Epoch [13/100], Loss: 0.7009
Epoch [14/100], Loss: 0.7543
Epoch [15/100], Loss: 0.8071
Epoch [16/100], Loss: 0.7981
Epoch [17/100], Loss: 0.7691
Epoch [18/100], Loss: 0.7280
Epoch [19/100], Loss: 0.6992
Epoch [20/100], Loss: 0.6808
Epoch [21/100], Loss: 0.6543
Epoch [22/100], Loss: 0.6069
Epoch [23/100], Loss: 0.5329
Epoch [24/100], Loss: 0.5107
Epoch [25/100], Loss: 0.5053
Epoch [26/100], Loss: 0.5314
Epoch [27/100], Loss: 0.5109
Epoch [28/100], Loss: 0.4540
Epoch [29/100], Loss: 0.4429
Epoch [30/100], Loss: 0.4520
Epoch [31/100], Loss: 0.4020
Epoch [32/100], Loss: 0.3897
Epoch [33/100], Loss: 0.3305
Epoch [34/100], Loss: 0.3255
Epoch [35/100], Loss: 0

In [17]:
model.eval()

with torch.no_grad():
    log_probs = model(X_test)  
    predictions = torch.argmax(log_probs, dim=1)

In [18]:
predictions.shape

torch.Size([5852])

In [19]:
df_predictions = pd.concat([trip_id, pd.DataFrame(predictions.numpy(), columns=["category"])], axis=1)

In [20]:
df_predictions.shape

(5852, 2)

In [21]:
df_predictions.to_csv("submission.csv", index=False)

In [22]:
model = NeuralNetwork()
yhat = model(X_batch)

In [23]:
from torchviz import make_dot

make_dot(yhat, params=dict(list(model.named_parameters()))).render("model_visualization", format="png")

'model_visualization.png'