In [1]:
import os
import time
import argparse
import module
import numpy as np
import matplotlib.pyplot as plt
from model2 import Model

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils as utils
import torch.nn.init as init

In [2]:
trial_num = 2
gene_num = 6
model_num = 4
tissue_num = 27
k_num = 3
lasso_alpha = 0.3

gene_data_name = 'UTMOST_top10_Liver'
gene_list = os.listdir('../../%s/'%gene_data_name)
gene_name = gene_list[gene_num-1]

print('\n\n[Gene %d] Model %d ( tissue %d ) - %d/5 fold data'%(gene_num, model_num, tissue_num, k_num))
print('Option : lasso coeff : %.4f'%lasso_alpha)



[Gene 6] Model 4 ( tissue 27 ) - 3/5 fold data
Option : lasso coeff : 0.3000


In [3]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

start_time = time.time()
np.random.seed(37)
torch.manual_seed(37)
torch.cuda.manual_seed_all(37)
torch.backends.cudnn.deterministic = True

snp, gx = module.load_data(gene_data_name, gene_name, tissue_num, proc=True)
snp_train, snp_test, gx_train, gx_test = module.k_fold_data(snp, gx, 5, k_num)

snp_train = torch.Tensor(snp_train).to(device)
snp_test = torch.Tensor(snp_test).to(device)
gx_train = torch.Tensor(gx_train).to(device)
gx_test = torch.Tensor(gx_test).to(device)

print('\nData shape @@@@@@')
print('Train data : ', np.shape(snp_train),' / ', np.shape(gx_train))
print('Test data : ', np.shape(snp_test), ' / ', np.shape(gx_test))
print('\n')


Data shape @@@@@@
Train data :  torch.Size([123, 27389])  /  torch.Size([123, 1])
Test data :  torch.Size([30, 27389])  /  torch.Size([30, 1])




In [None]:
learning_rate = 0.00003
model = Model(snp_len=snp_train.size()[-1], alpha=lasso_alpha).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

tr_loss_list = []
te_loss_list = []

tr_loss_buff = 0
te_loss_buff = 0
min_iter = 0

mb_div = 2
mb_idx = int(len(snp_train)/mb_div)
s = np.arange(len(snp_train))

for i in range(5001):
    np.random.shuffle(s)
    snp_train = snp_train[s]
    gx_train = gx_train[s]
    
    for mb in range(mb_div):
        dsnp_train = snp_train[mb*mb_idx:(mb+1)*mb_idx]
        dgx_train = gx_train[mb*mb_idx:(mb+1)*mb_idx]
        
        optimizer.zero_grad()
        tr_loss, output = model(dsnp_train, dgx_train)
        tr_loss.backward()
        optimizer.step()
    
    if i%100==0:
        tr_loss, output = model(snp_train, gx_train)
        te_loss, out_test = model(snp_test, gx_test)
        
        tr_loss_list.append(tr_loss.cpu().item())
        te_loss_list.append(te_loss.cpu().item())
        
    if i%100==0:
        print('iteration :', '%d/5000'%i, ' -  train loss :', \
              np.round(tr_loss.cpu().item(),3), '/  ', \
              'test loss :', np.round(te_loss.cpu().item(), 3))
        
        if te_loss_buff==0: te_loss_buff = te_loss.cpu().item(); continue
        
        if te_loss_buff>=te_loss.cpu().item():
            min_iter = i
            te_loss_buff = te_loss.cpu().item()
            tr_loss_buff = tr_loss.cpu().item()
            weight = model.fc1.weight.cpu().detach().numpy().reshape(-1)

iteration : 0/5000  -  train loss : 25.311 /   test loss : 25.94
iteration : 100/5000  -  train loss : 1.181 /   test loss : 1.701
iteration : 200/5000  -  train loss : 0.991 /   test loss : 1.393
iteration : 300/5000  -  train loss : 0.961 /   test loss : 1.298
iteration : 400/5000  -  train loss : 0.944 /   test loss : 1.234
iteration : 500/5000  -  train loss : 0.934 /   test loss : 1.192
iteration : 600/5000  -  train loss : 0.927 /   test loss : 1.165
iteration : 700/5000  -  train loss : 0.924 /   test loss : 1.15
iteration : 800/5000  -  train loss : 0.921 /   test loss : 1.14
iteration : 900/5000  -  train loss : 0.919 /   test loss : 1.134
iteration : 1000/5000  -  train loss : 0.918 /   test loss : 1.131
iteration : 1100/5000  -  train loss : 0.916 /   test loss : 1.129
iteration : 1200/5000  -  train loss : 0.916 /   test loss : 1.128
iteration : 1300/5000  -  train loss : 0.914 /   test loss : 1.127
iteration : 1400/5000  -  train loss : 0.913 /   test loss : 1.126
iteratio

In [None]:
print(np.sum(weight>0.0001), np.sum(weight<-0.0001))
plt.title('[Trial%d]model%d_gene%d_weight.png'%(trial_num, model_num, gene_num), fontsize=14)
plt.plot(weight,'.')
plt.xlabel('SNP index', fontsize=13)
plt.ylabel('weight', fontsize=13)
plt.savefig('../img/trial%d/weight_gene%d_model%d_k%d.png'%(trial_num, gene_num, model_num, k_num))
plt.show()