In [None]:
!pip install nbimporter

In [None]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# import torch.nn.init as init
from torch.autograd import Variable
from dataLoader import loadData
from tensorflow import keras

In [2]:
# # Downloading Movielens-1m
# !curl -O http://files.grouplens.org/datasets/movielens/ml-1m.zip
# #     http://www.grouplens.org/system/files/ml-1m.zip
# !unzip ml-1m.zip
# !cd ml-1m/

In [3]:
# import zipfile
# with zipfile.ZipFile('ml-1m.zip', 'r') as zip_ref:
#     zip_ref.extractall('./')

In [4]:
seed = 47
np.random.seed(seed)

In [5]:
# load data
tr, vr = loadData('./ml-1m/ratings.dat', delimiter='::', seed=seed, transpose=False, valfrac=0.1)

reading data...
data read in 5.6060099601745605 seconds
loaded dense data matrix


In [6]:
np.save('./tr_movielens_1m', tr)
np.save('./vr_movielens_1m', vr)

In [7]:
tr

array([[5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [8]:
vr

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [9]:
def get_sparsity(mat):
    sparsity = float(len(mat.nonzero()[0]))
    sparsity /= (mat.shape[0] * mat.shape[1])
    sparsity *= 100
    return sparsity

In [10]:
get_sparsity(tr)

4.021525859265269

In [11]:
get_sparsity(vr)

0.44683670296601535

In [12]:
train = tr.copy()

In [13]:
if torch.cuda.is_available() == True:
    device = "cuda"
else:
    device = "cpu"

# Training Denoising Autoencoder

In [14]:
############## Pytorch model doesn't converge - to do - check #################

from keras.optimizers import Adam
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras import regularizers

def autoEncoder(X):
    '''
    Autoencoder for Collaborative Filter Model
    '''

    # Input
    input_layer = Input(shape=(X.shape[1],), name='UserScore')
    
    # Encoder
    # -----------------------------
    enc = Dense(512, activation='selu', name='EncLayer1', kernel_regularizer=regularizers.l2(0.000001))(input_layer)

    # Latent Space
    # -----------------------------
    lat_space = Dense(512, activation='selu', name='LatentSpace', kernel_regularizer=regularizers.l2(0.000001))(enc)
    lat_space = Dropout(0.5, name='Dropout')(lat_space) # Dropout

    # Decoder
    # -----------------------------
    dec = Dense(512, activation='selu', name='DecLayer1', kernel_regularizer=regularizers.l2(0.000001))(lat_space)

    # Output
    output_layer = Dense(X.shape[1], activation='linear', name='UserScorePred', kernel_regularizer=regularizers.l2(0.000001))(dec)

    # this model maps an input to its reconstruction
    model = Model(input_layer, output_layer)    
    
    return model

Using TensorFlow backend.


In [15]:
zero_mask = (train == 0)
positive_feedback_mask = (train > 3)
negative_feedback_mask = ((train < 4) * (1 - zero_mask))

In [16]:
assert (positive_feedback_mask + negative_feedback_mask != zero_mask).all()
assert (positive_feedback_mask + negative_feedback_mask == 1 - zero_mask).all()

In [17]:
get_sparsity(zero_mask), get_sparsity(positive_feedback_mask), get_sparsity(negative_feedback_mask)

(95.97847414073473, 2.3124349989099473, 1.7090908603553212)

In [18]:
100 - get_sparsity(zero_mask), get_sparsity(positive_feedback_mask), get_sparsity(negative_feedback_mask), get_sparsity(positive_feedback_mask) + get_sparsity(negative_feedback_mask)

(4.021525859265267, 2.3124349989099473, 1.7090908603553212, 4.021525859265268)

In [19]:
P = 0.4
mask_arr_neg = (np.random.rand(negative_feedback_mask.shape[0], negative_feedback_mask.shape[1]) > P)
y_neg = negative_feedback_mask
X_neg = negative_feedback_mask*mask_arr_neg # corrupting 

In [20]:
mask_arr_pos = (np.random.rand(positive_feedback_mask.shape[0], positive_feedback_mask.shape[1]) > P)
y_pos = positive_feedback_mask
X_pos = positive_feedback_mask*mask_arr_pos # corrupting 

In [21]:
get_sparsity(X_neg), get_sparsity(y_neg), get_sparsity(X_pos), get_sparsity(y_pos)

(1.026195215919772, 1.7090908603553212, 1.3870741200058612, 2.3124349989099473)

In [23]:
model_neg = autoEncoder(X_neg)
model_neg.compile(optimizer = Adam(lr=0.0001), loss='mse')

model_pos = autoEncoder(X_pos)
model_pos.compile(optimizer = Adam(lr=0.0001), loss='mse')

In [24]:
model_neg = keras.models.load_model('./model_neg')
model_pos = keras.models.load_model('./model_pos')

In [25]:
model_neg.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
UserScore (InputLayer)       [(None, 3706)]            0         
_________________________________________________________________
EncLayer1 (Dense)            (None, 512)               1897984   
_________________________________________________________________
LatentSpace (Dense)          (None, 512)               262656    
_________________________________________________________________
Dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
DecLayer1 (Dense)            (None, 512)               262656    
_________________________________________________________________
UserScorePred (Dense)        (None, 3706)              1901178   
Total params: 4,324,474
Trainable params: 4,324,474
Non-trainable params: 0
_________________________________________________

In [26]:
# predicted_tr = np.load('predicted_tr.npy')
# augmented_train = np.load('augmented_train.npy')

In [27]:
hist_neg = model_neg.fit(x=X_neg, y=y_neg,
                  epochs=300,
                  batch_size=128,
                  shuffle=True,
# augmented_train = np.load('augmented_train.npy')
                  validation_split=0.0)

Train on 6040 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
 640/6040 [==>...........................] - ETA: 2s - loss: 0.010 - ETA: 1s - loss: 0.010 - ETA: 1s - loss: 0.0109

KeyboardInterrupt: 

In [28]:
hist_pos = model_pos.fit(x=X_pos, y=y_pos,
                  epochs=300,
                  batch_size=128,
                  shuffle=True,
                  validation_split=0.0)

Train on 6040 samples
Epoch 1/300
Epoch 2/300

KeyboardInterrupt: 

In [29]:
import matplotlib.pyplot as plt 

def plot_hist(hist):
    # summarize history for loss
    fig, ax = plt.subplots()  # create figure & 1 axis

    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')

    plt.plot(hist.history['loss'])
    #plt.plot(hist.history['val_loss'])

In [30]:
plot_hist(hist_neg)

NameError: name 'hist_neg' is not defined

In [None]:
plot_hist(hist_pos)

In [None]:
model_neg.save('./model_neg')
model_pos.save('./model_pos')

In [31]:
np.set_printoptions(precision=3)
predicted_neg = model_neg.predict(X_neg)
predicted_pos = model_pos.predict(X_pos)

In [32]:
(predicted_neg > 0.4).sum(), (y_neg == 1).sum() # predicted vs real

(170800, 382567)

In [33]:
(predicted_neg>0.5).sum(), (predicted_pos>0.5).sum() # trained on

(116139, 216900)

In [34]:
print((y_neg * (predicted_neg>0.4)).sum()/(predicted_neg>0.4).sum()) # accuracy on actual 
print((y_pos * (predicted_pos>0.4)).sum()/(predicted_pos>0.4).sum())

0.9506147540983606
0.9162219462901074


In [35]:
print((y_pos * (predicted_neg>0.4)).sum()/(y_pos>0.4).sum()) # just to see that it's a low number
print((y_neg * (predicted_pos>0.4)).sum()/(y_neg>0.4).sum())

0.004997865233443002
0.017272791432611803


In [36]:
(y_neg * (predicted_neg>0.5)).sum()/((predicted_neg>0.5).sum())

0.9846046547671324

In [37]:
(y_pos * (predicted_pos>0.5)).sum()/((predicted_pos>0.5).sum())

0.9687551867219917

In [38]:
((predicted_neg>0.5)  * (X_neg<0.5)).sum() # predicted values which were not in the train matrix

39945

In [39]:
((predicted_pos>0.5)  * (X_pos<0.5)).sum() # predicted values which were not in the train matrix

71113

In [40]:
((y_neg * (((predicted_neg>0.5)  * (X_neg<0.5)))) == 1).sum()/(((predicted_neg>0.5)  * (X_neg<0.5))).sum()

0.9552384528726999

In [41]:
((y_pos * (((predicted_pos>0.5)  * (X_pos<0.5)))) == 1).sum()/(((predicted_pos>0.5)  * (X_pos<0.5))).sum()

0.9047009688805141

In [42]:
to_add_negative = model_neg.predict(y_neg)
to_add_positive = model_neg.predict(y_pos)

In [43]:
(to_add_negative>0.5).sum(), (to_add_positive>0.5).sum()

(208239, 223363)

In [44]:
(y_neg* (to_add_negative>0.8)).sum()/(((to_add_negative>0.8)).sum()), (y_pos* (to_add_positive>0.8)).sum()/(((to_add_positive>0.8)).sum())

(0.9859022292187896, 0.9160156677389002)

In [45]:
# to keep the balance
threshold_neg = 0.2
threshold_pos = 0.3 

In [46]:
((to_add_negative > threshold_neg) * (tr==0)).sum(), ((to_add_positive > threshold_pos) * (tr==0)).sum() # new values # new values

(348459, 224127)

In [47]:
p_probs_neg = [(tr == 1).sum()/((tr > 0) & (tr < 4)).sum(), (tr == 2).sum()/(((tr > 0) & (tr < 4))).sum(), (tr == 3).sum()/((tr > 0) & (tr < 4)).sum()]
p_probs_neg

[0.13207882540835983, 0.25313474502505445, 0.6147864295665857]

In [48]:
p_probs_pos = [(tr == 4).sum()/((tr > 3) & (tr <= 5)).sum(), (tr == 5).sum()/((tr > 3) & (tr <= 5)).sum()]
p_probs_pos

[0.6064147320143503, 0.39358526798564974]

In [49]:
augmented_train = tr + (to_add_negative > threshold_neg) * (tr == 0) * np.random.choice(np.arange(1, 4), tr.shape, p=p_probs_neg) + (to_add_positive > threshold_pos) * (tr == 0) * np.random.choice(np.arange(4, 6), tr.shape, p=p_probs_pos)

In [50]:
get_sparsity(tr), get_sparsity(augmented_train) # reduced sparsity

(4.021525859265269, 6.3707367326297435)

In [51]:
(np.isin(tr, augmented_train)).all()

True

In [52]:
print(5, (5 == tr).sum())
print(4, (4 == tr).sum())
print(3, (3 == tr).sum())
print(2, (2 == tr).sum())
print(1, (1 == tr).sum())
print(0, (0 == tr).sum())

5 203728
4 313893
3 235197
2 96841
1 50529
0 21484052


In [53]:
print(5, (5 == augmented_train).sum())
print(4, (4 == augmented_train).sum())
print(3, (3 == augmented_train).sum())
print(2, (2 == augmented_train).sum())
print(1, (1 == augmented_train).sum())
print(0, (0 == augmented_train).sum())

5 277468
4 421283
3 420578
2 173280
1 90435
0 20958199


In [54]:
((tr == 0) * (augmented_train > 0)).sum() # new values

525853

In [55]:
# np.save('predicted_tr', predicted_tr)
np.save('augmented_train', augmented_train)

In [109]:
features_length = train.shape[1]
class NetD(torch.nn.Module):
    def __init__(self, feat_size):
        super(NetD, self).__init__()
        self.feat_size = feat_size
#         self.use_cuda = True
#         self.feat_size = feat_size
        # top
#         print(self.feat_size*2)
        self.t1 = torch.nn.Linear(self.feat_size, 1024)
        # bottom
        self.b1 = torch.nn.Linear(self.feat_size, 1024)
        # combined
        self.fc = torch.nn.Linear(2 * 1024, self.feat_size)
    def forward(self, xr, xf):
        # get filt
        
        filt = 1 - (xr * (xf >= 0.5).float()) - ((1-xr) * (xf < 0.5).float())
        # random swap
        idr = torch.multinomial(torch.Tensor([0.5,0.5]), xr.size(0), replacement=True)
        idrx = idr.float().unsqueeze(1).expand_as(xr)
#         if self.use_cuda: 
        idrx = idrx.cuda()
        idrx = Variable(idrx)
        xt = xr * idrx + xf * (1 - idrx)
        xb = xr * (1 - idrx) + xf * idrx
        # top : real
        xt = F.relu(self.t1(xt))
        # bottom : fake
        xb = F.relu(self.b1(xb))
        # combined
        x = torch.cat((xt, xb), 1)
        x = F.tanh(self.fc(x))
        # apply filter, aggregate
        x = filt * x
        x = x.mean(dim = 1).squeeze()
        # use sign, because of swapping
        sgn = idr * 2 - 1
        sgn = sgn.cuda()
        sgn = Variable(sgn.float())
        x = sgn * x
        return x


class NetG(nn.Module):
    
    def __init__(self, feat_size):

        super(NetG, self).__init__()
        self.feat_size = feat_size
        self.netGen = torch.nn.Sequential( 
                                torch.nn.Linear(nz + self.feat_size, 1024), 
#                                 torch.nn.BatchNorm1d(1024),
                                torch.nn.ReLU(), 
#                                 nn.Dropout(0.5),
#                                 torch.nn.Linear(1024, 1024),
# #                                 torch.nn.BatchNorm1d(1024),
#                                 torch.nn.ReLU(), 
#                                 nn.Dropout(0.6),
                                torch.nn.Linear(1024, features_length), 
                                torch.nn.Sigmoid()
#                                 torch.nn.BatchNorm1d(features_length),
#                                 nn.Dropout(0.7),
#                                 torch.nn.Sigmoid()
                                )

        
    def forward(self, e_mask, x):
        x = self.netGen(x)
        x = x * e_mask
        return x
#         return F.dropout(x, 0.7)
#         return 5 * self.netGen(x)
#         return torch.sigmoid(x) 
#         return x*5 # to get values in range [0,5]

In [110]:
def get_random_batch(mat, batch_size=64):
    '''
    returns random rows of size batch_size
    '''
    rand_rows = np.random.randint(mat.shape[0], size=batch_size)
#     print(mat.shape, rand_rows)
#     print(mat[rand_rows].shape)
    return mat[rand_rows]

In [111]:
train = torch.autograd.Variable(torch.Tensor(train))
augmented_train = torch.autograd.Variable(torch.Tensor(augmented_train))

In [112]:
get_sparsity(train.cpu().numpy()), get_sparsity(augmented_train.cpu().numpy())

(4.021525859265269, 6.3707367326297435)

In [113]:
# xx = get_random_batch(train)
# xy = get_random_batch(train)

In [114]:
# d_my(xx, xy)

In [115]:
# torch.sum(torch.abs(torch.abs(xx != 0).float()*xy - xy), 1)

In [116]:
# xx > xy

In [None]:
# # def d_my(x_r, x_g): # custom loss -todo
# #     return torch.sum(torch.abs((x_r != 0).float() * x_g - x_r), 1)/x_r.shape[1]

# def d_my(x_r, x_g): # custom loss -todo
#     return torch.sum(torch.abs(x_g - x_r), 1)/x_r.shape[1]

In [147]:
def batch_generator(corrupted, original, batch_size=64):
    rand_rows = np.random.randint(corrupted.shape[0], size=batch_size)
    return torch.Tensor(corrupted[rand_rows]).cuda().float(), torch.Tensor(original[rand_rows]).cuda().float()
#     return torch.from_numpy(corrupted[rand_rows]).float(), torch.from_numpy(original[rand_rows]).float()

In [148]:
get_sparsity(X_neg), get_sparsity(y_neg)

(1.026195215919772, 1.7090908603553212)

In [149]:
a,b = batch_generator(X_neg, y_neg)

get_sparsity(a.numpy()), get_sparsity(b.numpy()), a.shape, b.shape

TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [249]:
def train_GAN(netD, netG, negative, steps_per_epoch = 1000, epochs = 1000):
    d_iter = 5
    g_iter = 1
    gen_iterations = 0
    eval_losses = []
    for epoch in range(epochs):
        for c in range(steps_per_epoch):
            data_iter = 100
            i = 0
            while i < 100:
                ############################
                # (1) Update D network
                ###########################
                for p in netD.parameters(): # reset requires_grad
                    p.requires_grad = True # they are set to False below in netG update
    #             d_iter = d_iter
                j = 0
                while j < d_iter*5:
                    j += 1
                    # load real data
                    i += 1
                    if negative:
                        condition, X = batch_generator(X_neg, y_neg)
    #                 X, _ = data_iter.next()
    #                 X = X.view(X.size(0), -1)
    #                 X = (X >= 0.5).float()
                    if cuda: 
                        X = X.cuda()
                        condition = condition.cuda()
    #                 print(condition.shape, X_neg.shape, y_neg.shape)
                    real = Variable(X)

                    # generate fake data
                    noise = torch.randn(batch_size, nz)
                    if cuda: 
                        noise = noise.cuda()
                    noisev = Variable(noise, volatile = True) # totally freeze netG
                    concated = torch.cat((noisev, condition), 1)
    #                 print(condition.shape, condition.shape, X.shape, noisev.shape, )
                    e_mask = (real == 1).float()
                    fake = Variable(netG(e_mask, concated).data)

                    # compute gradient, take step
                    netD.zero_grad()
    #                 concated_real = torch.cat((real, condition), 1)
    #                 print(concated_real)
                    out = netD(real, fake)
                    outputD = torch.mean(out) + lamba * out.norm()
                    stdD = torch.std(out)
                    outputD.backward(mone)
                    optimizerD.step()
#                     print('AAAAAAAAA mse:=WWWWWWWWWWWWWWWWWWWWWW')
            ############################
            # (2) Update G network
            ###########################

    #         g_iter = g_iter
            j = 0
            while j < g_iter*5:
                j += 1
                for p in netD.parameters():
                    p.requires_grad = False # to avoid computation
                netG.zero_grad()
                # load real data

                i += 1
        #         X, _ = data_iter.next()
        #         X = X.view(X.size(0), -1)
        #         X = (X >= 0.5).float()
                if negative:
                    condition, X = batch_generator(X_neg, y_neg)
    #             if cuda: 
                X = X.cuda()
                condition = condition.cuda()
                real = Variable(X)

                # update generator
                noise = torch.randn(batch_size, nz)
    #             if args.cuda: 
                noise = noise.cuda()
                noisev = Variable(noise)
    #             print(condition.shape, X_neg.shape, y_neg.shape, noisev.shape)
                concated_ = torch.cat((noisev, condition), 1)
    #                 print(condition.shape, condition.shape, X.shape, noisev.shape, )
    #             e_mask = Variable((real == 1)).float()
    #             e_mask.requires_grad = True
    #             fake = Variable(netG(concated, X).data)
                e_mask_ = real.clone()
    #             concated_.requires_grad = True
    #             e_mask_.requires_grad = True
                real.requires_grad = True
                fake.requires_grad = True
                fake = Variable(netG(e_mask_, concated_).data)
    #             print((fake > 0).float().sum() == real.float().sum())
    #             print(fake)
    #             print(real)
    #             fake = netG(concated)
    #             concated_real = torch.cat((real, condition), 1)
                out = netD(real, fake)
                outputG = torch.mean(out) + lamba * out.norm()
                stdG = torch.std(out)
                outputG.backward(one)
                optimizerG.step()
                gen_iterations += 1
#             print('AAAAAA')
            eval_loss = F.mse_loss(fake, real, reduction='mean')
            eval_losses.append(eval_loss)
            print('mse:', eval_loss)
            print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f '% (epoch, epochs, i, 100, gen_iterations, outputD.item(), outputG.item()))
    return eval_losses

In [250]:
get_sparsity(X_neg), get_sparsity(y_neg)

(1.026195215919772, 1.7090908603553212)

In [251]:
# lrD = 5e-4
# lrG = 5e-4
# batch_size = 128
# cuda = True
# epochs = 1000 #change
# seed = 1
# nz = 16
# d_iter = 5
# g_iter = 1
# lamba = 2e-4

lrD = 5e-4
lrG = 5e-4
batch_size = 64
cuda = True
epochs = 1000
device = 5
seed = 1
nz = 10
lamba = 1e-2 

In [252]:
negative_feedback_mask.shape[1]

3706

In [253]:
# networks
netD_neg = NetD(negative_feedback_mask.shape[1]).cuda()
netG_neg = NetG(negative_feedback_mask.shape[1]).cuda()
print(netD_neg)
print(netG_neg)
optimizerG = optim.RMSprop(netG_neg.parameters(), lr=lrG)
optimizerD = optim.RMSprop(netD_neg.parameters(), lr=lrD)
one = torch.FloatTensor([1]).cuda()
mone = (-1 * one).cuda()

NetD(
  (t1): Linear(in_features=3706, out_features=1024, bias=True)
  (b1): Linear(in_features=3706, out_features=1024, bias=True)
  (fc): Linear(in_features=2048, out_features=3706, bias=True)
)
NetG(
  (netGen): Sequential(
    (0): Linear(in_features=3716, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=3706, bias=True)
    (3): Sigmoid()
  )
)


In [254]:
netD_neg.train()
netG_neg.train()
eval_losses_neg = train_GAN(netD_neg, netG_neg, negative=True)



mse: tensor(0.0042, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][5] Loss_D: 0.007600 Loss_G: 0.007457 
mse: tensor(0.0050, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][10] Loss_D: 0.012600 Loss_G: 0.009962 
mse: tensor(0.0039, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][15] Loss_D: 0.006205 Loss_G: 0.007759 
mse: tensor(0.0039, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][20] Loss_D: 0.007272 Loss_G: 0.008353 
mse: tensor(0.0044, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][25] Loss_D: 0.009498 Loss_G: 0.009186 
mse: tensor(0.0042, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][30] Loss_D: 0.009597 Loss_G: 0.009201 
mse: tensor(0.0048, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][35] Loss_D: 0.010787 Loss_G: 0.010193 
mse: tensor(0.0040, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][40] Loss_D: 0.008516 Loss_G: 0.009059 
mse: tensor(0.0042, device='cuda:0', grad_fn=<Mea

mse: tensor(0.0055, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][345] Loss_D: 0.009768 Loss_G: 0.012047 
mse: tensor(0.0044, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][350] Loss_D: 0.008149 Loss_G: 0.009802 
mse: tensor(0.0038, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][355] Loss_D: 0.011110 Loss_G: 0.008427 
mse: tensor(0.0052, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][360] Loss_D: 0.010924 Loss_G: 0.011408 
mse: tensor(0.0052, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][365] Loss_D: 0.008433 Loss_G: 0.011672 
mse: tensor(0.0033, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][370] Loss_D: 0.007623 Loss_G: 0.007092 
mse: tensor(0.0030, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][375] Loss_D: 0.012545 Loss_G: 0.006490 
mse: tensor(0.0056, device='cuda:0', grad_fn=<MeanBackward0>)
[0/1000][105/100][380] Loss_D: 0.011646 Loss_G: 0.012982 
mse: tensor(0.0044, device='cuda:0', gra

KeyboardInterrupt: 

In [256]:
torch.save(netD_neg.state_dict(), './netD_neg-1m')
torch.save(netG_neg.state_dict(), './netG_neg-1m')

In [257]:
netD_neg.eval()

NetD(
  (t1): Linear(in_features=3706, out_features=1024, bias=True)
  (b1): Linear(in_features=3706, out_features=1024, bias=True)
  (fc): Linear(in_features=2048, out_features=3706, bias=True)
)

In [258]:
condition, X = batch_generator(X_neg, y_neg, batch_size=1)

X = X.cuda()
condition = condition.cuda()
# real = Variable(X)

noise = torch.randn(1, nz).cuda()
# noise = noise.cuda()
# noisev = Variable(noise)

concated = torch.cat((noise, condition), 1)
fake = netG_neg(X, concated)

In [280]:
(fake >= 0.4).sum(), ((fake >= 0.4) * (condition==0)).sum(), condition.sum(), X.sum()

(tensor(21, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(13., device='cuda:0'),
 tensor(21., device='cuda:0'))

In [281]:
(fake >= 0.5).sum(), ((fake >= 0.5) * (condition==0)).sum(), condition.sum(), X.sum()

(tensor(12, device='cuda:0'),
 tensor(4, device='cuda:0'),
 tensor(13., device='cuda:0'),
 tensor(21., device='cuda:0'))

In [175]:
(fake > 0.51).sum()

tensor(29, device='cuda:0')

In [176]:
X.shape

torch.Size([1, 3706])

In [187]:
(fake > 0.48).sum(), ((fake > 0.48) * (condition==0)).sum(), condition.sum()

(tensor(160, device='cuda:0'),
 tensor(55, device='cuda:0'),
 tensor(107., device='cuda:0'))

In [188]:
np.isin(condition.cpu().numpy(), X.cpu().numpy()).all()

True

In [189]:
((fake > 0.48).float() * (condition==0).float() * X).sum()/(X * (1 - fake)).sum()

tensor(0.6708, device='cuda:0', grad_fn=<DivBackward0>)

In [190]:
((fake > 0.48).float() * (condition==0).float()*X).sum()/(X).sum()

tensor(0.3354, device='cuda:0')

In [191]:
((fake > 0.48).float() * (condition==0).float() * X).sum()/(X * (1 - fake)).sum()

tensor(0.6708, device='cuda:0', grad_fn=<DivBackward0>)

In [None]:
import matplotlib.pyplot as plt

plt.plot(eval_losses_tr)
plt.show()

In [None]:
# noise = torch.randn(train.shape[0], nz).to(device)
# noisev = Variable(noise)
# fake = netG_tr(noisev)

In [None]:
fake.shape

In [None]:
fake = np.around(fake.detach().cpu().numpy())

In [None]:
np.unique(fake)

In [None]:
fake = fake * (fake <= 5).astype(int)

In [None]:
np.unique(fake)

In [None]:
get_sparsity(fake)

In [None]:
print(5, (5 == fake.round()).sum(), (5 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(4, (4 == fake.round()).sum(), (4 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(3, (3 == fake.round()).sum(), (3 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(2, (2 == fake.round()).sum(), (2 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(1, (1 == fake.round()).sum(), (1 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(0, (0 == fake.round()).sum(), (0 == (tr + vr)[:fake.shape[0], :].round()).sum())

As we see there is a significant bias towards higher ratings

In [None]:
# networks
netD_augm = NetD().to(device)
netG_augm = NetG().to(device)
print(netD_augm)
print(netG_augm)
optimizerG = optim.RMSprop(netG_augm.parameters(), lr=lrG)
optimizerD = optim.RMSprop(netD_augm.parameters(), lr=lrD)
one = torch.FloatTensor([1]).to(device)
mone = (-1 * one).to(device)

In [None]:
noise = torch.randn(train.shape[0], nz).to(device)
noisev = Variable(noise)
fake = netG_augm(noisev)

In [None]:
fake = np.around(fake.detach().cpu().numpy())
np.unique(fake)

In [None]:
# without train
print(5, (5 == fake.round()).sum(), (5 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(4, (4 == fake.round()).sum(), (4 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(3, (3 == fake.round()).sum(), (3 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(2, (2 == fake.round()).sum(), (2 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(1, (1 == fake.round()).sum(), (1 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(0, (0 == fake.round()).sum(), (0 == (tr + vr)[:fake.shape[0], :].round()).sum())

In [None]:
# netD_augm.load_state_dict(torch.load('./netG_augm-1m'))
# netD_augm.load_state_dict(torch.load('./netD_augm-1m'))

In [None]:
netD_augm.train()
netG_augm.train()
eval_losses_aug = train_GAN(netD_augm, netG_augm, augmented_train)

In [None]:
import matplotlib.pyplot as plt

plt.plot(eval_losses_aug)
plt.show()

In [None]:
# netG_tr.eval()
netG_augm.eval()

In [None]:
noise = torch.randn(train.shape[0], nz).to(device)
noisev = Variable(noise)
fake = netG_augm(noisev)

In [None]:
fake = np.around(fake.detach().cpu().numpy())

In [None]:
np.unique(fake)

In [None]:
# fake = fake * (fake <= 5).astype(int)

fake = fake.clip(0,5)

In [None]:
print(5, (5 == fake.round()).sum(), (5 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(4, (4 == fake.round()).sum(), (4 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(3, (3 == fake.round()).sum(), (3 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(2, (2 == fake.round()).sum(), (2 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(1, (1 == fake.round()).sum(), (1 == (tr + vr)[:fake.shape[0], :].round()).sum())
print(0, (0 == fake.round()).sum(), (0 == (tr + vr)[:fake.shape[0], :].round()).sum())

In [None]:
noise = torch.randn(train.shape[0], nz).to(device)
noisev = Variable(noise)

fake_tr = netG_tr(noisev)
fake_aug = netG_augm(noisev)

In [None]:
torch.unique(fake_tr.round()), torch.unique(fake_aug.round())

In [None]:
fake_tr = fake_tr.clamp(0,5).detach().cpu().numpy().round()
fake_aug = fake_aug.clamp(0,5).detach().cpu().numpy().round()

In [None]:
print(5, (5 == fake_tr).sum(), (5 == (tr + vr)[:fake.shape[0], :]).sum())
print(4, (4 == fake_tr).sum(), (4 == (tr + vr)[:fake.shape[0], :]).sum())
print(3, (3 == fake_tr).sum(), (3 == (tr + vr)[:fake.shape[0], :]).sum())
print(2, (2 == fake_tr).sum(), (2 == (tr + vr)[:fake.shape[0], :]).sum())
print(1, (1 == fake_tr).sum(), (1 == (tr + vr)[:fake.shape[0], :]).sum())
print(0, (0 == fake_tr).sum(), (0 == (tr + vr)[:fake.shape[0], :]).sum())

In [None]:
print(5, (5 == fake_aug).sum(), (5 == (tr + vr)[:fake.shape[0], :]).sum())
print(4, (4 == fake_aug).sum(), (4 == (tr + vr)[:fake.shape[0], :]).sum())
print(3, (3 == fake_aug).sum(), (3 == (tr + vr)[:fake.shape[0], :]).sum())
print(2, (2 == fake_aug).sum(), (2 == (tr + vr)[:fake.shape[0], :]).sum())
print(1, (1 == fake_aug).sum(), (1 == (tr + vr)[:fake.shape[0], :]).sum())
print(0, (0 == fake_aug).sum(), (0 == (tr + vr)[:fake.shape[0], :]).sum())

In [None]:
get_sparsity(tr), get_sparsity(fake_tr), get_sparsity(fake_aug)

In [None]:
# tr_orig, vr_1 = loadData('./ml-1m/ratings.dat', delimiter='::', seed=seed,  transpose=False, valfrac=0.1)

In [None]:
import nbimporter 
import matrix_factorization

In [None]:
rand_ix = np.random.randint(0, fake.shape[0], 300)

In [None]:
# adding_fake_autoenc = fake_tr[rand_ix,:]
adding_fake_autoenc_lus_gan = fake_aug[rand_ix,:]

In [None]:
np.unique(fake_aug)

In [None]:
np.unique(adding_fake_autoenc_lus_gan[0,:])

In [None]:
(adding_fake_autoenc == 1).sum()

In [None]:
(adding_fake_autoenc_lus_gan == 1).sum()

In [None]:
np.unique(adding_fake_autoenc_lus_gan[0,:])

In [None]:
adding_fake[0,0:200]

In [None]:
adding_fake_autoenc_lus_gan[0,0:200]

In [None]:
tr_auto_enc = np.append(tr, adding_fake_autoenc, axis=0)
tr_auto_enc_plus_gan = np.append(tr, adding_fake_autoenc_lus_gan, axis=0)

In [None]:
iter_array = [1, 2, 5, 10, 25]
MF_SGD = matrix_factorization.ExplicitMF(tr, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([60], vr, learning_rate=0.001)

In [None]:
get_sparsity(augmented_train.cpu().numpy())

In [None]:
# iter_array = [1, 2, 5, 10, 25]
MF_SGD = matrix_factorization.ExplicitMF(augmented_train.cpu().numpy(), 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([60], vr, learning_rate=0.001)

In [None]:
# iter_array = [1, 2, 5, 10, 25]
MF_SGD = matrix_factorization.ExplicitMF(tr, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([50], vr, learning_rate=0.001)

In [None]:
# iter_array = [1, 2, 5, 10, 25]
MF_SGD = matrix_factorization.ExplicitMF(augmented_train.cpu().numpy(), 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([50], vr, learning_rate=0.001)

In [None]:
tr.shape, augmented_train.cpu().numpy().shape, tr_auto_enc.shape, tr_auto_enc_plus_gan.shape

In [None]:
get_sparsity(tr_auto_enc), get_sparsity(tr_auto_enc_plus_gan)

In [None]:
# iter_array = [1, 2, 5, 10, 25, 40]

MF_SGD = matrix_factorization.ExplicitMF(tr_auto_enc, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)

In [None]:
# iter_array = [1, 2, 5, 10, 25, 40]

MF_SGD = matrix_factorization.ExplicitMF(tr_auto_enc_plus_gan, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([60], vr, learning_rate=0.001)

In [None]:
get_sparsity(tr_auto_enc_plus_gan), tr_auto_enc_plus_gan.shape

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(tr_auto_enc_plus_gan, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([60], vr, learning_rate=0.001)

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(tr_auto_enc_plus_gan, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve([60], vr, learning_rate=0.001)

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(fake_aug, 40, learning='sgd', verbose=True)
iter_array = [1, 2, 5, 10, 25, 50, 60]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)

In [None]:
augmented_tr = augmented_train.cpu().numpy()

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(augmented_tr, 40, learning='sgd', verbose=True)
iter_array = [1, 2, 5, 10, 25, 50, 60]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(tr, 40, learning='sgd', verbose=True)
iter_array = [1, 2, 5, 10, 25, 50, 60]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(tr, 40, learning='als', verbose=True)
iter_array = [1, 2, 5, 10, 25, 50, 60]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)

In [None]:
MF_SGD = matrix_factorization.ExplicitMF(augmented_tr, 40, learning='als', verbose=True)
iter_array = [1, 2, 5, 10, 25, 50, 60]

# iter_array = [10]
# iter_array = [1, 2, 5, 10, 25]
MF_SGD.calculate_learning_curve(iter_array, vr, learning_rate=0.001)