In [None]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn 
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import time

In [94]:
df = pd.read_csv("final.csv")
df["EVENT_START_DATETIME"] = pd.to_datetime(df["EVENT_START_DATETIME"])
event_start = pd.to_datetime(df["EVENT_START_DATETIME"]).astype(np.int64) // 10**9
scrape_time = pd.to_datetime(df["scrape_time"]).astype(np.int64) // 10**9
df["time_to_concert"] =  event_start - scrape_time

localized_times = df.apply(lambda row: row['EVENT_START_DATETIME'].tz_convert(row['VENUE_TIMEZONE']), axis=1)

def get_day_of_week(dt):
    return dt.weekday()
df["day_of_week"] = localized_times.map(get_day_of_week)

def get_hour(dt):
    return dt.hour
df["hour_of_day"] = localized_times.map(get_hour)
to_keep = [
    "CLASSIFICATION_GENRE",
    "CLASSIFICATION_SUB_GENRE",
    "MIN_PRICE", 
    "MAX_PRICE",
    "HOT_EVENT",
    "CAPACITY",
    "population",
    "time_to_concert",
    "artist_popularity",
    "TRANSACTABLE",
    "day_of_week",
    "hour_of_day"
]
df = df[to_keep]
df["CLASSIFICATION_SUB_GENRE"] = df['CLASSIFICATION_SUB_GENRE'].fillna(df['CLASSIFICATION_GENRE'])
df.dropna(inplace=True)
categorical_cols = [col_name for col_name in df.columns if df[col_name].dtype == "object"]
X = pd.get_dummies(df.drop(["MIN_PRICE", "MAX_PRICE"], axis=1), columns=categorical_cols).to_numpy()
min_y = df['MIN_PRICE'].to_numpy()
max_y = df["MAX_PRICE"].to_numpy()

In [95]:
X_train, X_test, y_min_train, y_min_test = train_test_split(X, min_y, test_size=0.2, random_state=42)

In [96]:
if torch.backends.mps.is_available(): # my current system is Mac Silicon so I will be using the GPU associated with that
    device = torch.device("mps")
    print("Using MPS device")
else:
    device = torch.device("cpu") # revert the device back to the cpu if mps is not avilable
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")
    print("Falling back to CPU device")

Using MPS device


In [97]:
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

y_min_train = np.array(y_min_train, dtype = np.float32)
y_min_test = np.array(y_min_test, dtype = np.float32)

In [98]:
X_train_tensor = torch.from_numpy(X_train).squeeze() 
y_min_train_tensor = torch.from_numpy(y_min_train).squeeze()
X_test_tensor = torch.from_numpy(X_test).squeeze()
y_min_test_tensor = torch.from_numpy(y_min_test).squeeze() 

In [99]:
class CustomDataset(Dataset):
    """
    Class used to format data into Dataset that is compatible with data loader

    Attributes
    ----------
    Dataset: abstract class that provides structure for PyTorch Dataset
    """

    def __init__(self, X, y):
        """
        Initializes the CustomDataset with X data and y data

        Parameters
        ----------
        X: torch tensor
            Feature Data
        y : torch tensor
            Target Variable
        """
        self.X  = X
        self.y = y

    def __len__(self):
        """
        Obtains the length of the CustomDataset
        """
        return len(self.X)
    
    def __getitem__(self, idx):
        """
        Allows  the CustomDataset to be indexed
        """
        return self.X[idx], self.y[idx]

In [100]:
train_data = CustomDataset(X_train_tensor, y_min_train_tensor)
test_data = CustomDataset(X_test_tensor, y_min_test_tensor) 

In [101]:
train_loader = DataLoader(dataset = train_data,
                           batch_size = 10, shuffle = True, num_workers = 0)
test_loader = DataLoader(dataset = test_data, batch_size = 10) 

In [102]:
class fully_connected_nn(nn.Module):
    """
    Class to define our fully connected Neural Network

    Attributes
    ----------
    nn.Module: parent class that contains methods for creating neural network
    """
    def __init__(self):
        """
        Initializes the neural network with all necessary layers
        """
        super().__init__()

        self.layer1 = nn.Linear(52, 64) # inputs the 33 features and feeds it through a layer with width = 64
        self.layer2 = nn.Linear(64,64) # layer with width 66
        self.layer3 = nn.Linear(64, 1) # output a single value for prediction

      

    def forward(self, x):
        """
        defines the process when data is passed through the neural network

        Parameters
        ----------
        x: torch tensor
            Defines the data to pass through the tensor
        """

        x = x.view(-1, 52) # make sure the data is correctly formatted, 52 features

        out = self.layer1(x) # first layer 
        out = torch.relu(out) # ReLU function for non linearity

        out = self.layer2(out) # second layer
        out = torch.relu(out) # ReLU function for non linearity

        out = self.layer3(out) # third layer
           

        return out # return prediction

In [103]:
model_nn = fully_connected_nn()
model_nn = model_nn.to(device)

In [104]:
optimizer = optim.Adam(model_nn.parameters(), lr = 0.1)
criterion = nn.MSELoss()

## Min NN Predcition

In [105]:
start = time.time()  # Start timer
num_epochs = 101  # Initialize the number of epochs

for epoch in range(num_epochs):  # Iterate through epochs
    running_loss = 0  # Initialize training loss
    running_loss_test = 0  # Initialize testing loss

    model_nn.train()  # Put model into training mode

    for batch_data, batch_labels in train_loader:  # Get data batches
        optimizer.zero_grad()  # Clear gradients

        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)  # Move to GPU if needed
        outputs = model_nn(batch_data).squeeze()  # Forward pass
        loss = criterion(outputs, batch_labels)  # Compute MSE loss

        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()  # Update training loss


    # Evaluation phase
    model_nn.eval()  # Put model into evaluation mode
    all_preds_test = []  # Store test predictions
    all_labels_test = []

    with torch.no_grad():
        for batch_data_test, batch_labels_test in test_loader:
            batch_data_test, batch_labels_test = batch_data_test.to(device), batch_labels_test.to(device)
            test_outputs = model_nn(batch_data_test).squeeze()  # Forward pass
            test_loss = criterion(test_outputs, batch_labels_test.float().squeeze())  # Compute test loss

            running_loss_test += test_loss.item()  # Update test loss
            all_preds_test.extend(test_outputs.cpu().numpy().flatten())
            all_labels_test.extend(batch_labels_test.cpu().numpy().flatten())


    if (epoch) % 10 == 0:  # Print every 10 epochs
        print(f"Epoch [{epoch}/{num_epochs-1}], Time: {round(time.time() - start, 2)} sec")
        print(f"Train Loss: {running_loss/len(train_loader):.4f}, Test Loss: {running_loss_test/len(test_loader):.4f}")

print("Training complete.")  # Confirm training completion

Epoch [0/100], Time: 1.37 sec
Train Loss: 401817074530.6902, Test Loss: 2207.7432
Epoch [10/100], Time: 12.4 sec
Train Loss: 1327.9517, Test Loss: 1208.7710
Epoch [20/100], Time: 23.75 sec
Train Loss: 1326.2743, Test Loss: 1206.1139
Epoch [30/100], Time: 35.42 sec
Train Loss: 1319.6931, Test Loss: 1197.6754
Epoch [40/100], Time: 46.72 sec
Train Loss: 1306.6580, Test Loss: 1182.3901
Epoch [50/100], Time: 58.05 sec
Train Loss: 1296.9039, Test Loss: 1163.1656
Epoch [60/100], Time: 69.29 sec
Train Loss: 1223.9571, Test Loss: 1083.9795
Epoch [70/100], Time: 80.5 sec
Train Loss: 1077.0696, Test Loss: 942.6922
Epoch [80/100], Time: 91.89 sec
Train Loss: 857.0898, Test Loss: 720.2616
Epoch [90/100], Time: 103.04 sec
Train Loss: 584.5089, Test Loss: 446.8935
Epoch [100/100], Time: 114.52 sec
Train Loss: 346.0352, Test Loss: 242.6574
Training complete.


### Max NN Prediction

In [106]:
X_train, X_test, y_max_train, y_max_test = train_test_split(X, max_y, test_size=0.2, random_state=42)

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

y_max_train = np.array(y_max_train, dtype = np.float32)
y_max_test = np.array(y_max_test, dtype = np.float32)

In [107]:
X_train_tensor = torch.from_numpy(X_train).squeeze() 
y_max_train_tensor = torch.from_numpy(y_max_train).squeeze()
X_test_tensor = torch.from_numpy(X_test).squeeze()
y_max_test_tensor = torch.from_numpy(y_max_test).squeeze() 

In [108]:
train_data = CustomDataset(X_train_tensor, y_max_train_tensor)
test_data = CustomDataset(X_test_tensor, y_max_test_tensor) 

In [109]:
train_loader = DataLoader(dataset = train_data,
                           batch_size = 10, shuffle = True, num_workers = 0)
test_loader = DataLoader(dataset = test_data, batch_size = 10) 

In [None]:
start = time.time()  # Start timer
num_epochs = 101  # Initialize the number of epochs

for epoch in range(num_epochs):  # Iterate through epochs
    running_loss = 0  # Initialize training loss
    running_loss_test = 0  # Initialize testing loss

    model_nn.train()  # Put model into training mode

    for batch_data, batch_labels in train_loader:  # Get data batches
        optimizer.zero_grad()  # Clear gradients

        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)  # Move to GPU if needed
        outputs = model_nn(batch_data).squeeze()  # Forward pass
        loss = criterion(outputs, batch_labels)  # Compute MSE loss

        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()  # Update training loss


    # Evaluation phase
    model_nn.eval()  # Put model into evaluation mode
    all_preds_test = []  # Store test predictions
    all_labels_test = []

    with torch.no_grad():
        for batch_data_test, batch_labels_test in test_loader:
            batch_data_test, batch_labels_test = batch_data_test.to(device), batch_labels_test.to(device)
            test_outputs = model_nn(batch_data_test).squeeze()  # Forward pass
            test_loss = criterion(test_outputs, batch_labels_test.float().squeeze())  # Compute test loss

            running_loss_test += test_loss.item()  # Update test loss
            all_preds_test.extend(test_outputs.cpu().numpy().flatten())
            all_labels_test.extend(batch_labels_test.cpu().numpy().flatten())


    if (epoch) % 10 == 0:  # Print every 10 epochs
        print(f"Epoch [{epoch}/{num_epochs-1}], Time: {round(time.time() - start, 2)} sec")
        print(f"Train Loss: {running_loss/len(train_loader):.4f}, Test Loss: {running_loss_test/len(test_loader):.4f}")

print("Training complete.")  # Confirm training completion

Epoch [0/100], Time: 1.37 sec
Train Loss: 16438.5735, Test Loss: 9065.1093
Epoch [10/100], Time: 13.64 sec
Train Loss: 15016.8084, Test Loss: 8134.9933
Epoch [20/100], Time: 25.63 sec
Train Loss: 14911.1695, Test Loss: 8158.5663
Epoch [30/100], Time: 37.83 sec
Train Loss: 14916.2252, Test Loss: 8172.2402
Epoch [40/100], Time: 51.36 sec
Train Loss: 14912.8714, Test Loss: 8171.1292
Epoch [50/100], Time: 64.87 sec
Train Loss: 14914.7381, Test Loss: 8173.8925
Epoch [60/100], Time: 77.23 sec
Train Loss: 14910.7257, Test Loss: 8170.4131
Epoch [70/100], Time: 89.4 sec
Train Loss: 14917.6117, Test Loss: 8171.8787
Epoch [80/100], Time: 103.78 sec
Train Loss: 14907.5453, Test Loss: 8172.4904
Epoch [90/100], Time: 116.25 sec
Train Loss: 14912.0808, Test Loss: 8171.2051
Epoch [100/100], Time: 128.75 sec
Train Loss: 14908.9411, Test Loss: 8169.8372
Training complete.
