In [1]:
# Setup plotting
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('animation', html='html5')

# Setup feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.deep_learning_intro.ex6 import *

  plt.style.use('seaborn-whitegrid')
  if layer.__class__.__name__ is 'Dense']
  if layer.__class__.__name__ is 'Dense']


In [3]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

hotel = pd.read_csv('../input/dl-course-data/hotel.csv')

X = hotel.copy()
y = X.pop('is_canceled')

X['arrival_date_month'] = \
    X['arrival_date_month'].map(
        {'January':1, 'February': 2, 'March':3,
         'April':4, 'May':5, 'June':6, 'July':7,
         'August':8, 'September':9, 'October':10,
         'November':11, 'December':12}
    )

features_num = [
    "lead_time", "arrival_date_week_number",
    "arrival_date_day_of_month", "stays_in_weekend_nights",
    "stays_in_week_nights", "adults", "children", "babies",
    "is_repeated_guest", "previous_cancellations",
    "previous_bookings_not_canceled", "required_car_parking_spaces",
    "total_of_special_requests", "adr",
]
features_cat = [
    "hotel", "arrival_date_month", "meal",
    "market_segment", "distribution_channel",
    "reserved_room_type", "deposit_type", "customer_type",
]

transformer_num = make_pipeline(
    SimpleImputer(strategy="constant"), # there are a few missing values
    StandardScaler(),
)
transformer_cat = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="NA"),
    OneHotEncoder(handle_unknown='ignore'),
)

preprocessor = make_column_transformer(
    (transformer_num, features_num),
    (transformer_cat, features_cat),
)

# stratify - make sure classes are evenlly represented across splits
X_train, X_valid, y_train, y_valid = \
    train_test_split(X, y, stratify=y, train_size=0.75)

X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

input_shape = [X_train.shape[1]]

# Define Model

In [8]:
import torch
import torch.nn as nn

class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        
        self.input_bn = nn.BatchNorm1d(input_size)
        
        self.layer1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm1d(256)
        
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = nn.Linear(256, 256)
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm1d(256)
        
        self.dropout2 = nn.Dropout(0.3)
        self.output_layer = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.input_bn(x)
        x = self.relu1(self.layer1(x))
        x = self.bn1(x)
        
        x = self.dropout1(x)
        x = self.relu2(self.layer2(x))
        x = self.bn2(x)
        
        x = self.dropout2(x)
        x = self.sigmoid(self.output_layer(x))
        return x

# Assuming input_size is defined as 33
# The input_shape is already defined correctly in the provided code
# input_shape = [X_train.shape[1]]
# So, you just need to use it.

# Now, use the correct shape to initialize the model
input_size = X_train.shape[1]
model = BinaryClassifier(input_size)

# The rest of your code for compilation, training, and evaluation
# will work correctly after this fix.
model = BinaryClassifier(input_size)

In [9]:
import torch.optim as optim

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).unsqueeze(1)

# Training loop
epochs = 100
for epoch in range(epochs):
    # Set the model to training mode
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.4756
Epoch [20/100], Loss: 0.4455
Epoch [30/100], Loss: 0.4281
Epoch [40/100], Loss: 0.4195
Epoch [50/100], Loss: 0.4120
Epoch [60/100], Loss: 0.4064
Epoch [70/100], Loss: 0.4011
Epoch [80/100], Loss: 0.3975
Epoch [90/100], Loss: 0.3938
Epoch [100/100], Loss: 0.3899


In [11]:
# Evaluate the model
model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_valid_tensor)
    y_pred_class = (y_pred_tensor > 0.5).float()
    
    correct = (y_pred_class == y_valid_tensor).sum().item()
    total = y_valid_tensor.shape[0]
    accuracy = correct / total
    
    print(f'Accuracy on the validation set: {accuracy * 100:.2f}%')

Accuracy on the validation set: 82.12%
