In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import joblib

def load_raw_titanc():
  # Load the Titanic dataset
  titanic_url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
  return pd.read_csv(titanic_url)

def clean_titanic(titanic_data, fill_missing=True):
  df = titanic_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
  # Fill missing values
  if fill_missing:
    df['Age'].fillna(df['Age'].median(), inplace=True)
    df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
    df['Fare'].fillna(df['Fare'].median(), inplace=True)
  else:
    df = df.dropna()
  # Encode categorical features
  df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
  df['Embarked'] = df['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})
  return df

def load_titanic(cleaned=True, Xy=True):
  '''
  Returns the titanic dataset
  '''
  # Get data
  df = load_raw_titanc()
  # Clean data
  if cleaned:
    # Drop useless features
    df = clean_titanic(df)
  # Split Data
  if Xy:
    return df.drop('Survived', axis=1), df['Survived'] # X, y
  return df

def get_data_loaders(X_train, X_test, y_train, y_test, batch_size=32):
  # Convert to Tensors
  X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
  y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
  X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

  # Create Datasets
  train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
  test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

  # Create DataLoaders
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  return train_loader, test_loader

def train_loop(train_loader, model, optimizer, criterion, device):
  model.train()
  epoch_train_loss = 0.0
  for batch, labels in train_loader:
    batch, labels = batch.to(device), labels.to(device)
    optimizer.zero_grad()
    # Forward Pass
    outputs = model(batch)
    loss = criterion(outputs, labels)
    # Update Parameters
    loss.backward()
    optimizer.step()
    # Save loss
    epoch_train_loss += loss.item() * batch.size(0)
  return epoch_train_loss / len(train_loader.dataset)

def test_loop(test_loader, model, criterion, device):
  model.eval()
  epoch_test_loss = 0.0
  with torch.no_grad():
    for batch, labels in test_loader:
      batch, labels = batch.to(device), labels.to(device)
      # Calculate predictions
      outputs = model(batch)
      # Save loss
      loss = criterion(outputs, labels)
      epoch_test_loss += loss.item() * batch.size(0)
  return epoch_test_loss / len(test_loader.dataset)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cuda


In [4]:
import wandb
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/carlo/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcarlofinnegan[0m ([33mtraintest[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:

# Define hyperparameters
n_estimators = 30
max_depth = 2

# Create a new run in a project
run = wandb.init(
    project="Simple Scikit-Learn Run",
    notes="commit message for the run",
    config={
        "n_estimators": n_estimators,
        "max_depth": max_depth
    }
)

# Get Data
X, y = load_titanic()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=42)

# Define Random Forest Classifier
clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
clf.fit(X_train, y_train)

# Log metrics
wandb.log({"accuracy": clf.score(X_test, y_test)})

# Finish the run
wandb.finish()
     

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we

0,1
accuracy,▁

0,1
accuracy,0.7486


In [10]:
# Define hyperparameters
num_epochs = 50
learning_rate = 0.05
run_count = 2

# Start a run
run = wandb.init(
    project="Simple PyTorch Run",
    name=f"MyRun{run_count}",
    config={
        "num_epochs": num_epochs,
        "learning_rate": learning_rate
    }
)

# Get Data
X, y = load_titanic()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=42)
train_loader, test_loader = get_data_loaders(X_train, X_test, y_train, y_test)

# Define Simple PyTorch Model
model = nn.Sequential(
    nn.Linear(7, 32), # 7 input features -> 32 hidden nodes
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.ReLU(),
    nn.Linear(16, 1),
    nn.Sigmoid()
).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  train_loss = train_loop(train_loader, model, optimizer, criterion, device)
  test_loss = test_loop(test_loader, model, criterion, device)

  print(f"[Epoch {epoch+1}/{num_epochs}] TrainLoss: {train_loss}; TestLoss: {test_loss}")
  wandb.log({
    "train_loss": train_loss,
    "test_loss": test_loss
  })

wandb.finish()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we

[Epoch 1/50] TrainLoss: 0.6645588586839397; TestLoss: 0.623424286948902
[Epoch 2/50] TrainLoss: 0.6387273354476757; TestLoss: 0.5460224724348697
[Epoch 3/50] TrainLoss: 0.557707008351101; TestLoss: 0.5392009193004843
[Epoch 4/50] TrainLoss: 0.5330431541700041; TestLoss: 0.46666483622689486
[Epoch 5/50] TrainLoss: 0.5158350166310085; TestLoss: 0.46390859990812544
[Epoch 6/50] TrainLoss: 0.5080417180329226; TestLoss: 0.5156256243503293
[Epoch 7/50] TrainLoss: 0.48811886283788786; TestLoss: 0.4730841877074215
[Epoch 8/50] TrainLoss: 0.4806925527165445; TestLoss: 0.6098337083555466
[Epoch 9/50] TrainLoss: 0.5134174388446165; TestLoss: 0.5426587002903389
[Epoch 10/50] TrainLoss: 0.4923927228102523; TestLoss: 0.45189157394723517
[Epoch 11/50] TrainLoss: 0.48437382967284554; TestLoss: 0.4446116416147967
[Epoch 12/50] TrainLoss: 0.4763236819358354; TestLoss: 0.4962342929240712
[Epoch 13/50] TrainLoss: 0.48720734373907026; TestLoss: 0.42846654696837483
[Epoch 14/50] TrainLoss: 0.486017543110954

0,1
test_loss,█▅▅▂▂▃█▅▂▂▁▁▂▄▂▁▂▂▅▂▃▂▄▃▇▂▂▂▁▂▂▄▂▂▂▂▁▁▃▃
train_loss,█▅▄▃▃▂▃▃▂▂▂▂▂▃▂▂▁▁▂▂▃▃▂▂▃▂▂▁▂▂▂▂▂▁▂▂▃▂▂▁

0,1
test_loss,0.48741
train_loss,0.4527
