In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np

In [2]:
def get_data_loader(file, features, features_to_encode):
    df = pd.read_csv(file)
    df["Episode_Length_minutes"] = pd.to_numeric(df["Episode_Length_minutes"], errors="coerce")
    df["Number_of_Ads"] = pd.to_numeric(df["Number_of_Ads"], errors="coerce")
    df = df.dropna(subset=["Episode_Length_minutes"])
    df = df.dropna(subset=["Number_of_Ads"])
    def one_hot(df, feature):
        encoded = pd.get_dummies(df[[feature]])
        result = pd.concat([df, encoded], axis=1)
        result = result.drop([feature], axis=1)
        return(result) 
    y = df[["Listening_Time_minutes"]]
    df = df[features]
    for to_encode in features_to_encode:
        df = one_hot(df, to_encode)
    x = df
    x_tensor = torch.from_numpy(x.to_numpy().astype(np.float32))
    y_tensor = torch.from_numpy(y.to_numpy().astype(np.float32))
    dataset = TensorDataset(x_tensor, y_tensor)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
    return dataloader

features=["Episode_Length_minutes", "Number_of_Ads", "Episode_Sentiment"]
features_to_encode = ["Episode_Sentiment"]
train_dataloader = get_data_loader("train.csv", 
    features=features,
    features_to_encode=features_to_encode )

In [3]:
class PodcastPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(5, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024,  2048),
            nn.ReLU(),
            nn.Linear(2048, 4096),
            nn.ReLU(),
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )
    def forward(self, x):
        return self.net(x)

In [4]:
def rmse_loss(y_hat, y):
    return torch.sqrt(F.mse_loss(y_hat, y))

In [9]:
lr = 1e-4
lam = 0
num_epochs = 5

In [10]:
model = PodcastPredictor()
optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lam)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    i = 0
    for xb, yb in train_dataloader:
        y_hat = model(xb)
        loss = rmse_loss(y_hat, yb)
        optim.zero_grad()
        loss.backward()
        optim.step()
        total_loss += loss.item() * xb.size(0)
        if i % 1000 == 0:
            print(i)
        i += 1
    print(f"Epoch {epoch+1}, RMSE: {total_loss / len(train_dataloader.dataset):.4f}")


0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
Epoch 1, RMSE: 10.9282
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
Epoch 2, RMSE: 10.7488
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
Epoch 3, RMSE: 10.7059
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
Epoch 4, RMSE: 10.6783
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
Epoch 5, RMSE: 10.6660


In [11]:
torch.save(model, "model.pth")