In [1]:
pip install torch



In [2]:
!wget -q --show-progress -O subs.csv https://www.dropbox.com/scl/fi/x4k7vhf6pze0z5w333573/Subscribers.csv?rlkey=lkntddg23yktg1hdgbgt8nlcg&dl=0



In [3]:
import pandas as pd
import numpy as np

In [4]:
data = pd.read_csv('subs.csv')

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 51 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   V1           300000 non-null  float64
 1   V2           300000 non-null  float64
 2   V3           300000 non-null  float64
 3   V4           300000 non-null  float64
 4   V5           300000 non-null  float64
 5   V6           300000 non-null  float64
 6   V7           300000 non-null  float64
 7   V8           300000 non-null  float64
 8   V9           300000 non-null  float64
 9   V10          300000 non-null  float64
 10  V11          300000 non-null  float64
 11  V12          300000 non-null  float64
 12  V13          300000 non-null  float64
 13  V14          300000 non-null  float64
 14  V15          300000 non-null  float64
 15  V16          300000 non-null  float64
 16  V17          300000 non-null  float64
 17  V18          300000 non-null  float64
 18  V19          300000 non-

In [6]:
x = data.drop(columns=['Subscribers']).values
y = data['Subscribers']

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size =0.2, random_state=32)

In [9]:
scaler = StandardScaler()

x_train_sd = scaler.fit_transform(x_train)
x_test_sd = scaler.transform(x_test)

In [10]:
y_train.value_counts()

Unnamed: 0_level_0,count
Subscribers,Unnamed: 1_level_1
0,198837
1,41163


In [11]:
198837 + 41163

240000

In [12]:
240000 /(2*198837)

0.6035094072028848

In [13]:
240000 /(2*41163)

2.915239414036878

In [14]:
import torch

In [15]:
cw = np.array([0.603, 2.915])

In [16]:
cw_tensor = torch.tensor(cw, dtype=torch.float32)

In [17]:
from torch.utils.data import Dataset, DataLoader # DataLoaders helps in chunking of data i.e. random chunks of data

In [18]:
# len , getitems

class TabularData (Dataset):
  def __init__(self, x,y):
    self.x = torch.tensor(x, dtype=torch.float32)
    self.y = torch.tensor(y.values, dtype=torch.float32)

  def __len__(self):
    return len(self.x)

  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

train_ds = TabularData(x_train_sd, y_train)
test_ds = TabularData(x_test_sd, y_test)

In [19]:
train_loader = DataLoader(train_ds, batch_size=10000, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)

In [20]:
import torch.nn as nn
import torch.nn.functional as F

In [21]:
class DeepFFN(nn.Module):
  def __init__(self, input_dim):
    # we could customise this further
    # by providing the ability to have a customr number of layers and nodes
    super(DeepFFN, self).__init__()
    self.fc1 = nn.Linear(input_dim, 30)
    self.bn1 = nn.BatchNorm1d(30)
    self.dp1 = nn.Dropout(0.2)

    self.fc2 = nn.Linear(30, 20)
    self.bn2 = nn.BatchNorm1d(20)
    self.dp2 = nn.Dropout(0.2)

    self.fc3 = nn.Linear(20, 10)
    self.bn3 = nn.BatchNorm1d(10)
    self.dp3 = nn.Dropout(0.2)

    self.out = nn.Linear(10,1)
    self._init_wts()

  def _init_wts(self):
    for m in self.modules():
      if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        nn.init.zeros_(m.bias)



  def forward(self, x):
    x = self.dp1(self.bn1(F.relu(self.fc1(x))))
    x = self.dp2(self.bn2(F.relu(self.fc2(x))))
    x = self.dp3(self.bn3(F.relu(self.fc3(x))))

    return self.out(x)


In [22]:
torch.cuda.is_available()

True

In [23]:
device=torch.device('cuda') #torch.device('cpu') if result occurs false

In [24]:
model=DeepFFN(input_dim=50).to(device)
loss_fn=nn.BCEWithLogitsLoss(pos_weight=cw_tensor[1].to(device))
optimizer=torch.optim.Adam(model.parameters(), lr=0.001)
l1_lambda=0.01

In [25]:
import os

In [26]:
def save_checkpoint(model,epoch,loss):
  path=f'./model_outputs/saved-{epoch:02d}-{loss:.4f}.pt'
  os.makedirs('./model_outputs', exist_ok=True)
  torch.save(model.state_dict(), path)
  print(f'Model saved at {path}')


In [27]:
from sklearn.metrics import roc_auc_score

In [28]:
class AucTracker:
  def __init__(self,x,y):
    self.x=torch.tensor(x, dtype=torch.float32).to(device)
    self.y=y
    self.auc_scores=[]
   # self.y=torch.tensor(y.values, dtype=torch.float32).to(device)
    #self.reset()

  def compute(self,model,epoch):
    model.eval()

    with torch.no_grad():
      logits=model(self.x).squeeze().cpu().numpy()
      probs=1/(1+np.exp(-logits))
      auc=roc_auc_score(self.y, probs)
      self.auc_scores.append((auc))
      print(f'AUC on test set for epoch {epoch}:{auc:.4f}')
     # print(f'Epoch:{epoch}, AUC:{auc}')
auc_tracker=AucTracker(x_test_sd,y_test)

In [29]:
class EarlyStopping:
  def __init__(self, patience=5, min_delta=0):
    self.patience=patience
   # self.min_delta=min_delta
    self.counter=0
    self.best_loss=float('inf')
    self.should_stop=False
  #  self.early_stop=False
  def step(self,loss):
    if loss<self.best_loss:
      self.best_loss=loss
      self.counter=0
    else:
      self.counter+=1
      if self.counter>=self.patience:
        self.should_stop=True

In [30]:
early_stopper=EarlyStopping(patience=5)

In [31]:
def train_model(model,train_loader,loss_fn,optimizer,l1_lambda,n_epochs,auc_tracker,early_stopper):
  model.train()
  step=0
  for epoch in range(n_epochs):
    epoch_loss=0.0
    for batch_idx,(xb,yb) in enumerate(train_loader):
      xb,yb=xb.to(device),yb.to(device).unsqueeze(1)
      optimizer.zero_grad() #this is only resetting or forgetting pervious batch
                              #grads
      outputs=model(xb)

      loss=loss_fn(outputs,yb)
      l1_loss=sum(param.abs().sum() for param in model.parameters())
      total_loss=loss+l1_lambda*l1_loss
      total_loss.backward()  #this calculates gradients
      optimizer.step() # this updates the wts using current grads
      epoch_loss+=total_loss.item()
      #

      step+=1

      if step%50==0:
        pass
        #do something
        save_checkpoint(model,epoch+1,total_loss.item())

    avg_loss=epoch_loss/len(train_loader)
    print(f'Epoch:{epoch+1}, Avg Loss:{avg_loss}')
    auc_tracker.compute(model,epoch+1)

    early_stopper.step(avg_loss)

    if early_stopper.should_stop:
      print('Early Stopping Triggered')
      break


In [32]:
train_model(model, train_loader,loss_fn,optimizer,
            l1_lambda,n_epochs=200,
            auc_tracker=auc_tracker,
            early_stopper=early_stopper)

Epoch:1, Avg Loss:5.660636444886525
AUC on test set for epoch 1:0.5296


  probs=1/(1+np.exp(-logits))


Epoch:2, Avg Loss:5.053987483183543
AUC on test set for epoch 2:0.5590


  probs=1/(1+np.exp(-logits))


Model saved at ./model_outputs/saved-03-4.7939.pt
Epoch:3, Avg Loss:4.597788731257121
AUC on test set for epoch 3:0.5995


  probs=1/(1+np.exp(-logits))


Epoch:4, Avg Loss:4.194533407688141
AUC on test set for epoch 4:0.6245


  probs=1/(1+np.exp(-logits))


Model saved at ./model_outputs/saved-05-3.9547.pt
Epoch:5, Avg Loss:3.8266600171724954
AUC on test set for epoch 5:0.6381
Epoch:6, Avg Loss:3.492070496082306
AUC on test set for epoch 6:0.6449
Model saved at ./model_outputs/saved-07-3.2603.pt
Epoch:7, Avg Loss:3.196041206518809
AUC on test set for epoch 7:0.6492
Epoch:8, Avg Loss:2.9347426891326904
AUC on test set for epoch 8:0.6534
Model saved at ./model_outputs/saved-09-2.7610.pt
Epoch:9, Avg Loss:2.7023157874743142
AUC on test set for epoch 9:0.6571
Epoch:10, Avg Loss:2.495922734340032
AUC on test set for epoch 10:0.6612
Model saved at ./model_outputs/saved-11-2.3384.pt
Epoch:11, Avg Loss:2.3147254586219788
AUC on test set for epoch 11:0.6656
Epoch:12, Avg Loss:2.1580251355965934
AUC on test set for epoch 12:0.6700
Model saved at ./model_outputs/saved-13-2.0213.pt
Epoch:13, Avg Loss:2.023047889272372
AUC on test set for epoch 13:0.6736
Epoch:14, Avg Loss:1.9056583046913147
AUC on test set for epoch 14:0.6779
Model saved at ./model_o