In [34]:
import os

import lightning as L
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from torch.utils.data import DataLoader, random_split, TensorDataset
from torchvision.datasets import MNIST

device = "cuda" if torch.cuda.is_available() else "cpu"

In [35]:
class Generator(nn.Module):
    def __init__(self, product_dims, customer_dims):
        super().__init__()

        self.model = nn.Sequential(
            nn.Linear(product_dims, 64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(64, 64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(64, 64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(64, customer_dims),
        )

    def forward(self, product_vector):
        return self.model(product_vector)

In [36]:
class Discriminator(nn.Module):
    def __init__(self, customer_dims):
        super().__init__()

        self.model = nn.Sequential(
            nn.Linear(customer_dims, 64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(64, 64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(64, 32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(32, 32),
            nn.Linear(32, 1),
            nn.Sigmoid(),
        )

    def forward(self, customer_vector):
        return self.model(customer_vector)

In [5]:
customers = pd.read_csv('data/customers.csv')

In [6]:
customers.head()

Unnamed: 0,ID,TYPE,GENDER,DOB,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,POST_CODE,CITY,COUNTRY,POST_SUBSCRIBED,SMS_SUBSCRIBED,EMAIL_SUBSCRIBED,PHONE_SUBSCRIBED,SOCIAL_SUBSCRIBED,ANONYMISE
0,3e0698d7a4,New,Male,1988-04-16,Zelda,Schene,Zelda.SCHENE@yahoo.com,799947361,BH16,Liverpool,UK,True,True,True,True,True,True
1,3e06a74d39,Lookers,Male,2003-04-13,Betty,Asherman,Betty.ASHERMAN@yahoo.com,799947361,M1,Stockholm,Sweden,True,True,True,True,True,True
2,3e06b7960c,Loyal,Male,1972-04-20,Gusta,Leibowitz,Gusta.LEIBOWITZ@yahoo.com,799947361,BH16,Seattle,US,True,True,True,True,True,True
3,3e06ed2cfb,Need-based,Male,2002-04-13,Mona,Roca,Mona.ROCA@yahoo.com,799947361,BH16,Austin,US,True,True,True,True,True,True
4,3e07060e00,Loyal,Female,1998-04-14,Lesley,Bieck,Lesley.BIECK@yahoo.com,799947361,M1,Gothenburg,Sweden,True,True,True,True,True,True


In [7]:
pd.get_dummies(customers['GENDER'])[['Female', 'Male']]
pd.get_dummies(customers['COUNTRY'])
pd.get_dummies(customers['TYPE'])
((datetime.today() - pd.to_datetime(customers['DOB']))/timedelta(days=365)).round()

0         35.0
1         20.0
2         51.0
3         21.0
4         25.0
          ... 
536779    32.0
536780    25.0
536781    28.0
536782    40.0
536783    31.0
Name: DOB, Length: 536784, dtype: float64

In [23]:
final = pd.concat([
    pd.get_dummies(customers['GENDER'])[['Female', 'Male']],
    pd.get_dummies(customers['COUNTRY']),
    pd.get_dummies(customers['TYPE'])
], axis=1)
final['age'] = ((datetime.today() - pd.to_datetime(customers['DOB']))/timedelta(days=365)).round()/100

In [24]:
final

Unnamed: 0,Female,Male,Sweden,UK,US,Bargain hunter,Impulse,Lookers,Loyal,Need-based,New,age
0,0,1,0,1,0,0,0,0,0,0,1,0.35
1,0,1,1,0,0,0,0,1,0,0,0,0.20
2,0,1,0,0,1,0,0,0,1,0,0,0.51
3,0,1,0,0,1,0,0,0,0,1,0,0.21
4,1,0,1,0,0,0,0,0,1,0,0,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...
536779,0,1,1,0,0,0,0,0,1,0,0,0.32
536780,0,1,0,1,0,0,1,0,0,0,0,0.25
536781,1,0,1,0,0,0,0,0,0,1,0,0.28
536782,1,0,0,1,0,0,0,0,0,0,1,0.40


In [86]:
train_size = 0.8
batch_size = 1024
lr = 3e-4
num_epochs = 50
customer_dims = len(final.columns)
product_dims = 10
fixed_noise = torch.randn((batch_size, product_dims)).to(device)
step = 0

disc = Discriminator(customer_dims).to(device)
gen = Generator(product_dims, customer_dims).to(device)
criterion = nn.BCELoss()
opt_disc = optim.Adam(disc.parameters(), lr=lr)
opt_gen = optim.Adam(gen.parameters(), lr=lr)

train_inds = random.sample(list(final.index.values), int(train_size*len(final.index)))
input_data = torch.tensor(final.iloc[train_inds].values.astype(np.float32))
labels = torch.ones_like(input_data)
train_dataset = TensorDataset(
    input_data,
    labels
)
loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

In [87]:
n_total_steps = len(loader)
for epoch in range(num_epochs):
    for batch_idx, (real, _) in enumerate(loader):
        real = real.to(device)
        batch_size = real.shape[0]
        
        # Train Discriminator: max log(D(real)) + log(1 - D(G(z)))
        noise = torch.randn(batch_size, product_dims)
        fake = gen(noise)
        disc_real = disc(real).view(-1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).view(-1)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake)/2
        disc.zero_grad()
        lossD.backward(retain_graph=True)
        opt_disc.step()
        
        # Train Generator min log(1 - D(G(z))) -> max log(D(G(z)))
        output = disc(fake).view(-1)
        lossG = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        lossG.backward()
        opt_gen.step()
        
        if ((batch_idx + 1) % 50 == 0) or batch_idx == 0:
            print(
                f"Epoch [{epoch}/{num_epochs}] \ "
                f"Step [{batch_idx + 1}/{n_total_steps}] \ "
                f"Loss D: {lossD:.4f}, Loss G: {lossG:.4f}"
            )
            
            if batch_idx == 0:
                with torch.no_grad():
                    fake = gen(fixed_noise)
                    print(fake)

Epoch [0/50] \ Step [1/420] \ Loss D: 0.6926, Loss G: 0.6559
tensor([[-0.0140, -0.1188, -0.1028,  ..., -0.1018,  0.0619, -0.1043],
        [-0.0398, -0.1191, -0.1014,  ..., -0.1101,  0.0680, -0.0799],
        [-0.0441, -0.1907, -0.1022,  ..., -0.1105,  0.0747, -0.1173],
        ...,
        [-0.0541, -0.0834, -0.0916,  ..., -0.1065,  0.0589, -0.1102],
        [-0.0211, -0.1038, -0.1204,  ..., -0.0861,  0.0662, -0.0829],
        [-0.0426, -0.1001, -0.0777,  ..., -0.0813,  0.0676, -0.1026]])
Epoch [0/50] \ Step [50/420] \ Loss D: 0.6557, Loss G: 0.6475
Epoch [0/50] \ Step [100/420] \ Loss D: 0.6512, Loss G: 1.0890
Epoch [0/50] \ Step [150/420] \ Loss D: 0.6959, Loss G: 0.7920
Epoch [0/50] \ Step [200/420] \ Loss D: 0.6040, Loss G: 0.7421
Epoch [0/50] \ Step [250/420] \ Loss D: 0.6879, Loss G: 0.6926
Epoch [0/50] \ Step [300/420] \ Loss D: 0.5507, Loss G: 0.9104
Epoch [0/50] \ Step [350/420] \ Loss D: 0.7408, Loss G: 0.9033
Epoch [0/50] \ Step [400/420] \ Loss D: 0.6627, Loss G: 0.8838
Ep

Epoch [8/50] \ Step [50/420] \ Loss D: 0.4444, Loss G: 1.2153
Epoch [8/50] \ Step [100/420] \ Loss D: 0.4012, Loss G: 1.4424
Epoch [8/50] \ Step [150/420] \ Loss D: 0.4895, Loss G: 1.1718
Epoch [8/50] \ Step [200/420] \ Loss D: 0.3200, Loss G: 1.6712
Epoch [8/50] \ Step [250/420] \ Loss D: 0.5106, Loss G: 1.4671
Epoch [8/50] \ Step [300/420] \ Loss D: 0.2838, Loss G: 1.8628
Epoch [8/50] \ Step [350/420] \ Loss D: 0.3787, Loss G: 1.8018
Epoch [8/50] \ Step [400/420] \ Loss D: 0.4419, Loss G: 1.8933
Epoch [9/50] \ Step [1/420] \ Loss D: 0.4468, Loss G: 1.7037
tensor([[ 0.6953,  0.4231,  0.3929,  ...,  0.6976,  0.7842,  0.1792],
        [ 0.9001,  0.2915, -0.0113,  ..., -0.1842, -0.7206,  0.3837],
        [ 0.5465,  0.6289,  0.5885,  ...,  0.7743,  0.8091,  0.2316],
        ...,
        [ 0.7126,  0.4431,  0.0478,  ..., -0.1528, -0.4877,  0.3609],
        [ 0.2446,  0.5760,  0.0725,  ...,  0.2431,  0.5348,  0.2187],
        [ 0.6187,  0.5323,  0.8134,  ...,  0.8190,  0.7097,  0.1860]])
Ep

Epoch [16/50] \ Step [50/420] \ Loss D: 0.2833, Loss G: 2.3021
Epoch [16/50] \ Step [100/420] \ Loss D: 0.2857, Loss G: 2.1063
Epoch [16/50] \ Step [150/420] \ Loss D: 0.1960, Loss G: 2.4509
Epoch [16/50] \ Step [200/420] \ Loss D: 0.2206, Loss G: 2.4046
Epoch [16/50] \ Step [250/420] \ Loss D: 0.1968, Loss G: 2.2608
Epoch [16/50] \ Step [300/420] \ Loss D: 0.1999, Loss G: 2.3999
Epoch [16/50] \ Step [350/420] \ Loss D: 0.1957, Loss G: 2.1766
Epoch [16/50] \ Step [400/420] \ Loss D: 0.2009, Loss G: 2.3286
Epoch [17/50] \ Step [1/420] \ Loss D: 0.3376, Loss G: 2.1815
tensor([[ 9.4644e-01,  1.8167e-03,  9.8826e-01,  ...,  9.3261e-01,
         -3.1334e-02,  3.3736e-01],
        [-1.6750e-02,  9.5042e-01,  1.8995e-02,  ...,  9.3469e-01,
         -2.5579e-02,  3.1730e-01],
        [ 9.5347e-01, -3.2225e-02,  9.4208e-02,  ...,  8.9380e-01,
         -3.9513e-02,  3.2439e-01],
        ...,
        [-3.8916e-02,  9.1021e-01,  9.7693e-01,  ...,  8.6436e-01,
         -2.5106e-02,  3.0091e-01],
  

KeyboardInterrupt: 

In [27]:
"""
test_dataset = TensorDataset(
    torch.tensor(X[~X.index.isin(train_inds)].values.astype(np.float32)),
    torch.tensor(y[~y.index.isin(train_inds)].values.astype(np.float32))
)
"""


# test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [17]:
customers

Unnamed: 0,ID,TYPE,GENDER,DOB,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,POST_CODE,CITY,COUNTRY,POST_SUBSCRIBED,SMS_SUBSCRIBED,EMAIL_SUBSCRIBED,PHONE_SUBSCRIBED,SOCIAL_SUBSCRIBED,ANONYMISE
0,3e0698d7a4,New,Male,1988-04-16,Zelda,Schene,Zelda.SCHENE@yahoo.com,799947361,BH16,Liverpool,UK,True,True,True,True,True,True
1,3e06a74d39,Lookers,Male,2003-04-13,Betty,Asherman,Betty.ASHERMAN@yahoo.com,799947361,M1,Stockholm,Sweden,True,True,True,True,True,True
2,3e06b7960c,Loyal,Male,1972-04-20,Gusta,Leibowitz,Gusta.LEIBOWITZ@yahoo.com,799947361,BH16,Seattle,US,True,True,True,True,True,True
3,3e06ed2cfb,Need-based,Male,2002-04-13,Mona,Roca,Mona.ROCA@yahoo.com,799947361,BH16,Austin,US,True,True,True,True,True,True
4,3e07060e00,Loyal,Female,1998-04-14,Lesley,Bieck,Lesley.BIECK@yahoo.com,799947361,M1,Gothenburg,Sweden,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536779,83975980ac,Loyal,Male,1991-04-16,Glenn,Rorman,Glenn.RORMAN@yahoo.com,799947361,ME20,Kiruna,Sweden,True,True,True,True,True,True
536780,839766e877,Impulse,Male,1998-04-14,Shanell,Hartkorn,Shanell.HARTKORN@yahoo.com,799947361,ME20,Glasgow,UK,True,True,True,True,True,True
536781,8397796cf8,Need-based,Female,1995-04-15,Delma,Lambo,Delma.LAMBO@yahoo.com,799947361,L15,Kiruna,Sweden,True,True,True,True,True,True
536782,83978b079a,New,Female,1983-04-18,Christiana,Dakin,Christiana.DAKIN@yahoo.com,799947361,M1,London,UK,True,True,True,True,True,True
