In [1]:
import torch
from torch import Tensor, nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split

from data_prep import get_vectors

In [4]:
# return train_X, y, features, test_X, test_df.PassengerId
X, y, features, X_valid, validation_ids = get_vectors()
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=11)

In [7]:
train_dataset = TensorDataset(Tensor(X_train), Tensor(y_train))
train_loader = DataLoader(train_dataset, batch_size=32)

In [16]:
test_dataset = TensorDataset(Tensor(X_test), Tensor(y_test))
test_loader = DataLoader(test_dataset, batch_size=X_test.shape[0])

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [10]:
print(device)

cpu


In [11]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(12, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 2),
            nn.ReLU()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [12]:
model = NeuralNetwork().to(device)

In [13]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [14]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs.float())
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

Finished Training


In [27]:
dataiter = iter(test_loader)
passengers, survived = dataiter.next()
outputs = model(passengers)
preds = torch.max(outputs,1)[1].numpy()

In [28]:
preds

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0])

In [29]:
y_test

array([1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0])

In [31]:
# sklearn.metrics.accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None)[source]¶
from sklearn.metrics import accuracy_score

In [32]:
accuracy_score(y_test, preds)

0.8340807174887892

### there's a baseline for a neural network!
could try different epochs, batch sizes, only certain features

### ↓ OLD 

In [None]:
from data_prep import get_vectors

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
import torch

In [None]:

from torch import nn
import torch.nn.functional as F

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=11)

In [None]:
X, y, features = get_vectors('data/train.csv')
X_valid, passenger_ids = get_vectors('data/test.csv', test=True)

In [None]:
conda install pytorch

In [None]:
x_t = torch.from_numpy(X_train)

In [None]:
X_train_tensor = torch.Tensor(X_train)

In [None]:
X_train_tensor.shape

In [None]:
from sklearn.datasets import make_classification
X,y = make_classification()

# Load necessary Pytorch packages
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor

# Create dataset from several tensors with matching first dimension
# Samples will be drawn from the first dimension (rows)
dataset = TensorDataset( Tensor(X), Tensor(y) )

# Create a data loader from the dataset
# Type of sampling and batch size are specified at this step
loader = DataLoader(dataset, batch_size= 3)

# Quick test
next(iter(loader))

In [None]:
type(X_train_tensor), type(x_t)

In [None]:
X_train_tensor.shape, x_t.shape

In [None]:
y_train_tensor = torch.Tensor(y_train)

In [None]:
y_train_tensor.shape

In [None]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10))

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
from torch import optim

In [None]:
model = nn.Sequential(nn.Linear(12, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 2),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5
for e in range(epochs):
    running_loss = 0
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    print(loss.item())
#     for images, labels in trainloader:
#         # Flatten MNIST images into a 784 long vector
#         images = images.view(images.shape[0], -1)
    
#         # TODO: Training pass
#         optimizer.zero_grad()
        
#         output = model(images)
#         loss = criterion(output, labels)
#         loss.backward()
#         optimizer.step()
        
#         running_loss += loss.item()
#     else:
#         print(f"Training loss: {running_loss/len(trainloader)}")

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(12, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
model = NeuralNetwork().to('cpu')

In [None]:
print(model)

In [None]:
X = torch.rand(1, 28, 28, device='cpu')

In [None]:
logits = model(x_t.float())

In [None]:
type(x_t)

In [None]:
pred_probab = nn.Softmax(dim=1)(logits)

In [None]:
pred_probab

In [None]:
from sklearn.preprocessing import normalize

In [None]:
import numpy as np

In [None]:
XX = np.array([
    [240, 34, 72],
    [170, 51, 73],
    [170, 41, 74]
])

In [None]:
n_x = normalize(XX)

In [None]:
n_x

In [None]:
n_x_1 = normalize(XX, axis=0)

In [None]:
n_x_1

In [None]:
n_x_2 = normalize(XX, axis=1)

In [None]:
n_x_2

In [None]:
XX.mean(axis=0)

In [None]:
XX.std(axis=0)

In [None]:
(240-193.333)/32.99

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
StandardScaler(XX)

In [None]:
ss = StandardScaler()

In [None]:
xx_ss = ss.fit_transform(XX)

In [None]:
ss.transform(np.array([[145,14,54]]))

In [None]:
xx_ss