In [1]:
import os
import sys
cwd = os.getcwd()
module_path = os.path.abspath(os.path.join('..'))
sys.path.insert(0, os.path.join(cwd, "../"))

import pickle
import torch
import importlib
import numpy as np
import pandas as pd
from collections import defaultdict
import copy

import src.bin.tensorify as tensorify
import src.utils.data_conversion_utils as conversions
import src.data_manager.student_life_var_binned_data_manager as data_manager
import src.bin.trainer as trainer

from sklearn.manifold import TSNE
from sklearn import metrics
import matplotlib
import matplotlib.pyplot as plt

from torch import nn
from copy import copy
from copy import deepcopy
from src import definitions
from src.bin import validations
from src.bin import statistics
from src.bin import plotting
from src.utils.read_utils import read_pickle
from src.utils import student_utils
from src.data_manager import sub_sampler
from src.data_manager import cross_val
from src.data_manager import helper as data_manager_helper
from tabulate import tabulate
from src.models import autoencoder
from src.models import multitask_learning
from src.models import user_dense_heads

from src.models.multitask_learning import multitask_autoencoder
import warnings
warnings.filterwarnings('ignore')


%matplotlib inline

Student ID couldn't be converted to Integer!


In [2]:
print(module_path)
data_path = "{}/data".format(module_path)
ae_model_path = "{}/best_ae_multi_task_model.tar".format(data_path)
print(ae_model_path)

/Users/nsimsiri/Documents/code/ml/MultiRes/student_life
/Users/nsimsiri/Documents/code/ml/MultiRes/student_life/data/best_ae_multi_task_model.tar


In [None]:
student_list = ['student_7', 'student_46', 'student_33', 'student_22', 'student_1', 
                'student_24', 'student_53', 'student_4', 'student_57', 'student_42', 
                'student_2', 'student_49', 'student_10']

num_features = 9
autencoder_bottle_neck_feature_size = 128
autoencoder_num_layers = 1
shared_hidden_layer_size = 256
user_dense_layer_hidden_size = 64
num_classes = 3
num_covariates = 4
shared_layer_dropout_prob = 0.0
user_head_dropout_prob = 0.0

In [None]:
model = multitask_autoencoder.MultiTaskAutoEncoderLearner(
                               student_list,
                               num_features,
                               autencoder_bottle_neck_feature_size,
                               autoencoder_num_layers,
                               shared_hidden_layer_size,
                               user_dense_layer_hidden_size,
                               num_classes,
                               num_covariates,
                               shared_layer_dropout_prob,
                               user_head_dropout_prob)



In [None]:
state_dict = torch.load(ae_model_path, map_location='cpu')
print(len(state_dict))

In [None]:
model.load_state_dict(state_dict)


### Interpreting Heads

In [None]:
dense_layer_dict = model.user_heads.student_dense_layer
print(dense_layer_dict['student_1'])
s2w = defaultdict(dict)
print(len(dense_layer_dict))
for student,seq in dense_layer_dict.items():
    for k,v in seq.state_dict().items():
        if 'weight' in k:
            s2w[student][k] = v

print(s2w.keys())
# print(s2w['student_1'])

In [None]:
tsne = TSNE(n_components=2, perplexity=10, n_iter=300)

w0_raw = []
w3_raw = []

s1 = []
s2 = []
s3 = []

w_cat = []

for student, weight_dict in s2w.items():
    w_keys = list(weight_dict.keys())
    _w0t = weight_dict[w_keys[0]]
    _w0 = _w0t.reshape((1, -1)).numpy()
    w0_raw.append(_w0)
    
    _w3t = weight_dict[w_keys[1]]
    _w3  = _w3t.reshape((1, -1)).numpy()
    w3_raw.append(_w3)
    
    w_label = weight_dict[w_keys[1]].numpy()
    s1.append(w_label[0].reshape(1, -1))
    s2.append(w_label[1].reshape(1, -1))
    s3.append(w_label[2].reshape(1, -1))

    _w_cat = np.concatenate([_w0, _w3], axis=1)
    w_cat.append(_w_cat)
      
w0 = np.concatenate(w0_raw, axis=0)
w3 = np.concatenate(w3_raw, axis=0)

s1_cat = np.concatenate(s1, axis=0)
s2_cat = np.concatenate(s2, axis=0)
s3_cat = np.concatenate(s3, axis=0)

w_cat_cat = np.concatenate(w_cat, axis=0)

# print(w0.shape)
# print(w3.shape)
# print(s1_cat.shape, s2_cat.shape, s3_cat.shape)
tsne_0 = tsne.fit_transform(w0)
tsne_3 = tsne.fit_transform(w3)

tsne_s1 = tsne.fit_transform(s1_cat)
tsne_s2 = tsne.fit_transform(s2_cat)
tsne_s3 = tsne.fit_transform(s3_cat)

tsne_cat = tsne.fit_transform(w_cat_cat)


print(tsne_s1.shape)
print(tsne_0.shape)
print(tsne_3.shape)
print(tsne_cat.shape)

In [None]:
def plot_tsne_stress(s_arr):
    fig,ax =  plt.subplots(figsize=(4,4))
    groups = ['low', 'medium', 'high']
    for i, tsne_xy  in enumerate(s_arr):
        X = tsne_xy[:,0]
        y = tsne_xy[:,1]
        cmap = matplotlib.cm.get_cmap('prism')
        ax.scatter(X,y, color=['yellow','orange','red'][i], alpha = 0.7, label=groups[i])
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.title("t-SNE stress classifier's weights")
    plt.legend()
    plt.show()
    
plot_tsne_stress([tsne_s1, tsne_s2, tsne_s3])

In [None]:
def plot_tsne_basic(tsne_xy, title):
    X = tsne_xy[:,0]
    y = tsne_xy[:,1]
    fig,ax =  plt.subplots(figsize=(4,4))
    cmap = matplotlib.cm.get_cmap('prism')
    ax.scatter(X,y, color=cmap(4), alpha = 0.7, label='layer_1')
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.title(title)
    plt.show()
plot_tsne_basic(tsne_0, 't-SNE personal head, linear layer (128, 64)')
plot_tsne_basic(tsne_3, 't-SNE personal head, classification layer (64, 3)')
plot_tsne_basic(tsne_cat, 't-SNE personal head, all layers')
# cmap = matplotlib.cm.get_cmap('prism')
# print(cmap(5))

In [None]:
def PlotScatter(tsne_results, name, TrainDS, KE, MultiLabel = False, Legend=True):
    Groups = []
    for (i, kin) in enumerate(TrainDS.Kinases):
        if MultiLabel:
            # Just use the first group label as they are few and it won't make any difference in the visualization
            kinase = kin[0]
        else:
            kinase = kin
        Groups.append(np.argmax(KE.UniProtID_to_GroupVec[kinase.UniprotID]))
    Groups = np.array(Groups)
    Group_names = np.array(['low', 'medium', 'high'])
    fig,ax =  plt.subplots(figsize=(8,8))
    cmap = matplotlib.cm.get_cmap('prism')
    scatters = []
    for i in range(10):
        scatters.append(ax.scatter(tsne_results[Groups==i,0], tsne_results[Groups==i,1], color=cmap(i/9), alpha = 0.7, label= Group_names[i]))
    if Legend:
        plt.legend(bbox_to_anchor=(1, 1), loc=2,
               ncol=1, borderaxespad=0.)
        
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    plt.show()
    fig.savefig(name, bbox_inches='tight')

In [None]:
a = np.zeros((3,3))
print(a)
b = np.random.random((2,3))
print(b.resize((3,3)))
print(b)