# generate data

In [1]:
import scipy
from scipy.special import expit, logit
import numpy as np
import math
from torch import nn
from torch.nn import functional as F
import torch
import copy
from sklearn.metrics import roc_auc_score

In [2]:
def generate_data_uniform(p, n, min_b, mean_b, meanX, sdX, train_prop = 1.0):
    
    # generate uniform b's
    low = min_b
    high = (2*mean_b) - min_b
    b = np.random.uniform(low, high, size=(p,1))
    # normal X's
    X = torch.from_numpy(np.random.normal(meanX, sdX, size=(n, p))).float()
    
    # logistic Y's
    Y = torch.from_numpy(np.random.binomial(1, expit( np.matmul(X,b)))).float()
    
    # split into training and test
    X_train = X
    Y_train = Y
    X_test = None
    Y_test = None
    
    if train_prop < 1.0:
        xy = torch.cat((Y,X),axis=1)        
        train_cutoff = int(n * train_prop)
        train = xy[:train_cutoff,:]
        Y_train, X_train =  train[:,0], train[:,1:]
        
        test = xy[train_cutoff:,:]
        Y_test, X_test = test[:,0], test[:, 1:]
    
    
    return Y_train, X_train, Y_test, X_test

In [3]:
p = 50

# source params
n_source = 10000
min_b_source = 0.5
mean_b_source = 2.0
meanX_source = 0.0
sdX_source = 1.0
train_prop_source = 0.80

# target params
n_target = 100
min_b_target = 0.5
mean_b_target = 2.0
meanX_target = 0.0
sdX_target = 1.0
train_prop_target = 0.80

In [4]:
Y_source_train, X_source_train, Y_source_test, X_source_test = generate_data_uniform(p = p,
                                                n = n_source,
                                                 min_b = min_b_source,
                                                 mean_b = mean_b_source,
                                                 meanX =meanX_source,
                                                 sdX = sdX_source,
                                                 train_prop = train_prop_source)
                                                         
    
Y_target_train, X_target_train, Y_target_test, X_target_test = generate_data_uniform(p = p,
                                                                n = n_target,
                                                                 min_b = min_b_target,
                                                                 mean_b = mean_b_target,
                                                                 meanX =meanX_target,
                                                                 sdX = sdX_target,
                                                                 train_prop = train_prop_target)

# Pytorch NN and data setup

In [5]:
class BinaryClassification(nn.Module):
    def __init__(self, input_shape):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(input_shape, 10) 
        self.layer_out = nn.Linear(10, 1) 

        
    def forward(self, inputs):
        x = torch.relu(self.layer_1(inputs))
        x = torch.relu(self.layer_out(x))
        
        return x

# Train source model

In [6]:
learning_rate = 0.01
epochs = 1400
# Model , Optimizer, Loss
source_model = BinaryClassification(input_shape=X_source_train.shape[1])
optimizer = torch.optim.SGD(source_model.parameters(),lr=learning_rate)
loss_fn = nn.BCEWithLogitsLoss()

In [7]:
losses = []
accur = []
for i in range(epochs):

    #calculate output
    output = source_model(X_source_train)

    #calculate loss
    loss = loss_fn(output,Y_source_train.reshape(-1,1))

    #backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses.append(loss)
#     print(f"epoch {i}\tloss :{loss}")


In [8]:
source_roc_auc = roc_auc_score(Y_source_test, source_model(X_source_test).detach().numpy())
print(f"source auc:\t{source_roc_auc}")

source auc:	0.9826444826444825


# Train target model (no transfer learning)

In [9]:
learning_rate = 0.01
epochs = 1400
# Model , Optimizer, Loss
target_model = BinaryClassification(input_shape=X_target_train.shape[1])
optimizer = torch.optim.SGD(target_model.parameters(),lr=learning_rate)
loss_fn = nn.BCEWithLogitsLoss()

In [10]:
losses = []
accur = []
for i in range(epochs):

    #calculate output
    output = target_model(X_target_train)

    #calculate loss
    loss = loss_fn(output, Y_target_train.reshape(-1,1))

    #backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses.append(loss)
#     print(f"epoch {i}\tloss :{loss}")


In [11]:
target_roc_auc = roc_auc_score(Y_target_test, target_model(X_target_test).detach().numpy())
print(f"target auc:\t{target_roc_auc}")

target auc:	0.8131868131868132


# Target model with Transfer Learning

In [12]:
learning_rate = 0.01
epochs = 1400
# Model , Optimizer, Loss
target_model_tl = copy.deepcopy(source_model)


In [13]:
# freeze layers by so the weights do not update
for param in target_model_tl.parameters():
    param.requires_grad = False

In [14]:
# reassing last layer with requires_grad=true by default
target_model_tl.layer_out = nn.Linear(target_model_tl.layer_out.in_features, target_model_tl.layer_out.out_features)

In [15]:
optimizer = torch.optim.SGD(target_model_tl.parameters(),lr=learning_rate)
loss_fn = nn.BCEWithLogitsLoss()

In [16]:
losses = []
accur = []
for i in range(epochs):

    #calculate output
    output = target_model_tl(X_target_train)

    #calculate loss
    loss = loss_fn(output, Y_target_train.reshape(-1,1))

    #backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses.append(loss)
#     print(f"epoch {i}\tloss :{loss}")


In [17]:
target_tl_roc_auc = roc_auc_score(Y_target_test, target_model_tl(X_target_test).detach().numpy())
print(f"target TL auc:\t{target_tl_roc_auc}")

target TL auc:	0.8901098901098902
