In [1]:
pip install torch



In [None]:
!wget -q --show-progress -O subs.csv https://www.dropbox.com/scl/fi/x4k7vhf6pze0z5w333573/Subscribers.csv?rlkey=lkntddg23yktg1hdgbgt8nlcg&dl=0



In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('subs.csv')

In [None]:
data.info()

In [None]:
x = data.drop(columns=['Subscribers']).values
y = data['Subscribers']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size =0.2, random_state=32)

In [None]:
scaler = StandardScaler()

x_train_sd = scaler.fit_transform(x_train)
x_test_sd = scaler.transform(x_test)

In [None]:
y_train.value_counts()

In [None]:
198837 + 41163

In [None]:
240000 /(2*198837)

In [None]:
240000 /(2*41163)

In [None]:
import torch

In [None]:
cw = np.array([0.603, 2.915])

In [None]:
cw_tensor = torch.tensor(cw, dtype=torch.float32)

In [None]:
from torch.utils.data import Dataset, DataLoader # DataLoaders helps in chunking of data i.e. random chunks of data

In [None]:
# len , getitems

class TabularData (Dataset):
  def __init__(self, x,y):
    self.x = torch.tensor(x, dtype=torch.float32)
    self.y = torch.tensor(y.values, dtype=torch.float32)

  def __len__(self):
    return len(self.x)

  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

train_ds = TabularData(x_train_sd, y_train)
test_ds = TabularData(x_test_sd, y_test)

In [None]:
train_loader = DataLoader(train_ds, batch_size=10000, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class DeepFFN(nn.Module):
  def __init__(self, input_dim):
    # we could customise this further
    # by providing the ability to have a customr number of layers and nodes
    super(DeepFFN, self).__init__()
    self.fc1 = nn.Linear(input_dim, 30)
    self.bn1 = nn.BatchNorm1d(30)
    self.dp1 = nn.Dropout(0.2)

    self.fc2 = nn.Linear(30, 20)
    self.bn2 = nn.BatchNorm1d(20)
    self.dp2 = nn.Dropout(0.2)

    self.fc3 = nn.Linear(20, 10)
    self.bn3 = nn.BatchNorm1d(10)
    self.dp3 = nn.Dropout(0.2)

    self.out = nn.Linear(10,1)
    self._init_wts()

  def _init_wts(self):
    for m in self.modules():
      if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        nn.init.zeros_(m.bias)



  def forward(self, x):
    x = self.dp1(self.bn1(F.relu(self.fc1(x))))
    x = self.dp2(self.bn2(F.relu(self.fc2(x))))
    x = self.dp3(self.bn3(F.relu(self.fc3(x))))

    return self.out(x)


In [None]:
torch.cuda.is_available()

In [None]:
device=torch.device('cuda') #torch.device('cpu') if result occurs false

In [None]:
model=DeepFFN(input_dim=50).to(device)
loss_fn=nn.BCEWithLogitsLoss(pos_weight=cw_tensor[1].to(device))
optimizer=torch.optim.Adam(model.parameters(), lr=0.001)
l1_lambda=0.01

In [None]:
import os

In [None]:
def save_checkpoint(model,epoch,loss):
  path=f'./model_outputs/saved-{epoch:02d}-{loss:.4f}.pt'
  os.makedirs('./model_outputs', exist_ok=True)
  torch.save(model.state_dict(), path)
  print(f'Model saved at {path}')


In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
class AucTracker:
  def __init__(self,x,y):
    self.x=torch.tensor(x, dtype=torch.float32).to(device)
    self.y=y
    self.auc_scores=[]
   # self.y=torch.tensor(y.values, dtype=torch.float32).to(device)
    #self.reset()

  def compute(self,model,epoch):
    model.eval()

    with torch.no_grad():
      logits=model(self.x).squeeze().cpu().numpy()
      probs=1/(1+np.exp(-logits))
      auc=roc_auc_score(self.y, probs)
      self.auc_scores.append((auc))
      print(f'AUC on test set for epoch {epoch}:{auc:.4f}')
     # print(f'Epoch:{epoch}, AUC:{auc}')
auc_tracker=AucTracker(x_test_sd,y_test)

In [None]:
class EarlyStopping:
  def __init__(self, patience=5, min_delta=0):
    self.patience=patience
   # self.min_delta=min_delta
    self.counter=0
    self.best_loss=float('inf')
    self.should_stop=False
  #  self.early_stop=False
  def step(self,loss):
    if loss<self.best_loss:
      self.best_loss=loss
      self.counter=0
    else:
      self.counter+=1
      if self.counter>=self.patience:
        self.should_stop=True

In [None]:
early_stopper=EarlyStopping(patience=5)

In [None]:
def train_model(model,train_loader,loss_fn,optimizer,l1_lambda,n_epochs,auc_tracker,early_stopper):
  model.train()
  step=0
  for epoch in range(n_epochs):
    epoch_loss=0.0
    for batch_idx,(xb,yb) in enumerate(train_loader):
      xb,yb=xb.to(device),yb.to(device).unsqueeze(1)
      optimizer.zero_grad() #this is only resetting or forgetting pervious batch
                              #grads
      outputs=model(xb)

      loss=loss_fn(outputs,yb)
      l1_loss=sum(param.abs().sum() for param in model.parameters())
      total_loss=loss+l1_lambda*l1_loss
      total_loss.backward()  #this calculates gradients
      optimizer.step() # this updates the wts using current grads
      epoch_loss+=total_loss.item()
      #

      step+=1

      if step%50==0:
        pass
        #do something
        save_checkpoint(model,epoch+1,total_loss.item())

    avg_loss=epoch_loss/len(train_loader)
    print(f'Epoch:{epoch+1}, Avg Loss:{avg_loss}')
    auc_tracker.compute(model,epoch+1)

    early_stopper.step(avg_loss)

    if early_stopper.should_stop:
      print('Early Stopping Triggered')
      break


In [None]:
train_model(model, train_loader,loss_fn,optimizer,
            l1_lambda,n_epochs=200,
            auc_tracker=auc_tracker,
            early_stopper=early_stopper)