In [2]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import timeit
from mlmodel import *
import pickle
from sklearn.linear_model import Ridge, LinearRegression, LogisticRegression
import utils
from torchvision import transforms
import torchvision.models as models
from torch import nn
import json
from sklearn.preprocessing import MaxAbsScaler

In [2]:
!ls ../ZSTL_Data/hetrec2011-lastfm-2k/

[31martists.dat[m[m                       [31muser_artists.dat[m[m
[34mextracted_feature[m[m                 [31muser_friends.dat[m[m
[31mreadme.txt[m[m                        [31muser_taggedartists-timestamps.dat[m[m
[31mtags.dat[m[m                          [31muser_taggedartists.dat[m[m


In [22]:
path_user_artist = '../ZSTL_Data/hetrec2011-lastfm-2k/user_artists.dat'
path_artist = '../ZSTL_Data/hetrec2011-lastfm-2k/artists.dat'
path_user_artist_tag = '../ZSTL_Data/hetrec2011-lastfm-2k/user_taggedartists-timestamps.dat'
path_user_friends = '../ZSTL_Data/hetrec2011-lastfm-2k/user_friends.dat'

df_user_artist = pd.read_csv(path_user_artist, sep="\t")
print(df_user_artist, len(df_user_artist.artistID.unique()))
df_user_artist_tag = pd.read_csv(path_user_artist_tag, sep="\t")
print(df_user_artist_tag, len(df_user_artist_tag.userID.unique()),\
    len(df_user_artist_tag.artistID.unique()),len(df_user_artist_tag.tagID.unique()))

df_user_friends = pd.read_csv(path_user_friends, sep="\t")
print(df_user_friends, len(df_user_friends.userID.unique()), len(df_user_friends.friendID.unique()))

df_artist = pd.read_csv(path_artist, sep="\t")
print(df_artist, len(df_artist.id.unique()))
print(df_artist.loc[df_artist.id.eq(14103)])

userID  artistID  weight
0           2        51   13883
1           2        52   11690
2           2        53   11351
3           2        54   10300
4           2        55    8983
...       ...       ...     ...
92829    2100     18726     337
92830    2100     18727     297
92831    2100     18728     281
92832    2100     18729     280
92833    2100     18730     263

[92834 rows x 3 columns] 17632
        userID  artistID  tagID      timestamp
0            2        52     13  1238536800000
1            2        52     15  1238536800000
2            2        52     18  1238536800000
3            2        52     21  1238536800000
4            2        52     41  1238536800000
...        ...       ...    ...            ...
186474    2100     16437      4  1277935200000
186475    2100     16437    292  1272664800000
186476    2100     16437   2087  1277935200000
186477    2100     16437   2801  1272664800000
186478    2100     16437   3335  1277935200000

[186479 rows x 4 columns] 

In [23]:
num_user = len(df_user_artist.userID.unique())
num_artist = len(df_artist.id.unique())
num_tag = len(df_user_artist_tag.tagID.unique())
print(num_user, num_artist, num_tag)

1892 17632 9749


In [24]:
def genUserItem_table(df_user_artist, num_user, num_artist):
    artistID_to_X_row = {}
    userID_to_Y_row = {}

    y = np.zeros((num_user, num_artist))
    print('y shape ', y.shape)
    i = 0
    j = 0
    for r in df_user_artist.iterrows():
        #print(r[0])
        #print(r[1].to_numpy())
        user_artist_count = r[1].to_numpy()
        #print(user_artist_count)
        if user_artist_count[0] not in userID_to_Y_row.keys():
            userID_to_Y_row[user_artist_count[0]] = i
            cur_i = i
            i += 1
        else:
            cur_i = userID_to_Y_row[user_artist_count[0]]

        if user_artist_count[1] not in artistID_to_X_row.keys():
            artistID_to_X_row[user_artist_count[1]] = j
            cur_j = j
            j += 1
        else:
            cur_j = artistID_to_X_row[user_artist_count[1]]
        
        #print(cur_i, cur_j)
        y[cur_i, cur_j] = 1

    sparse = 1 - np.sum(y)/(num_user * num_artist)
    print('sparsity ', sparse, np.sum(y))
    print(y[y>1])
    print('i, j ', i ,j)
    return y, userID_to_Y_row, artistID_to_X_row

y, userID_to_Y_row, artistID_to_X_row = genUserItem_table(df_user_artist, num_user, num_artist)

y shape  (1892, 17632)
sparsity  0.9972171848800758 92834.0
[]
i, j  1892 17632


In [25]:
def gen_X_n_Attr(df_user_artist_tag, df_user_friends, userID_to_Y_row, artistID_to_X_row, num_user, num_artist, num_tag):
    X_tagRecord_byID = {}
    friend_to_a_col = {}
    tag_to_x_col = {}
    xcol = 0
    acol = 0
    num_tag_artist = len(df_user_artist_tag.artistID.unique())

    a = np.zeros((num_user, num_user))
    print('a shape ', a.shape)
    x = np.zeros((num_artist, num_tag))
    not_shown_artist = []

    #gen attr
    for r in df_user_friends.iterrows():
        #print('friend relation ', r[1].to_numpy())
        user_friend = r[1].to_numpy()
        cur_user = user_friend[0]
        friend = user_friend[1]
        if friend not in friend_to_a_col:
            friend_to_a_col[friend] = acol
            cur_acol = acol
            acol += 1          
        else:
            cur_acol = friend_to_a_col[friend]

        a[userID_to_Y_row[cur_user], cur_acol] = 1
        
    #a = ppp

    for r in df_user_artist_tag.iterrows():
        user_artist_tag = r[1].to_numpy()

        cur_user = user_artist_tag[0]
        cur_artist = user_artist_tag[1]
        cur_tag = user_artist_tag[2]

        if cur_user not in X_tagRecord_byID:
            X_tagRecord_byID[cur_user] = []
            X_tagRecord_byID[cur_user].append((cur_artist, cur_tag))
        else:
            X_tagRecord_byID[cur_user].append((cur_artist, cur_tag))

        if cur_tag not in tag_to_x_col:
            tag_to_x_col[cur_tag] = xcol
            xcol += 1

        if cur_artist in artistID_to_X_row:
            x[artistID_to_X_row[cur_artist], tag_to_x_col[cur_tag]] += 1

    #print('not shown artist ', len(not_shown_artist), not_shown_artist)
    return a, X_tagRecord_byID, tag_to_x_col, x

print('len ', len(artistID_to_X_row))
a, X_tagRecord_byID, tag_to_x_col, x = gen_X_n_Attr(df_user_artist_tag, df_user_friends, userID_to_Y_row, artistID_to_X_row, num_user, num_artist, num_tag)

len  17632
a shape  (1892, 1892)


In [26]:
num_tag = 0
for k in X_tagRecord_byID.keys():
    num_tag += len(X_tagRecord_byID[k])

avg_tag = num_tag/len(list(X_tagRecord_byID.keys()))
print('avg_tag ', avg_tag)

a_meanFriend = np.mean(np.sum(a, axis=1))
print('a_meanFriend ', a_meanFriend)

print('x ', np.sum(x), np.mean(np.sum(x, axis=1)) )
print(np.sum(np.sum(x, axis=0)==0))

avg_tag  98.56183932346723
a_meanFriend  13.44291754756871
x  184941.0 10.48894056261343
31


In [8]:
sum_y = np.sum(y, axis=1)
print('sum y ', np.max(sum_y), np.min(sum_y), np.mean(sum_y), np.median(sum_y))
# for i in sum_y:
#     print(i)

sum y  50.0 1.0 49.06659619450317 50.0


In [27]:
def genCompressedData(userID_to_Y_row, artistID_to_X_row, X_tagRecord_byID, y, a, destination, compressed_size=100):
    compressd_task_byID = {}
    i = 0
    for t_id in userID_to_Y_row.keys():
        #print('t ', t_id, userID_to_Y_row[t_id])
        
        cur_indx = userID_to_Y_row[t_id]
        cur_y = y[cur_indx, :]
        #print('y ', cur_y)
        pos_indx = np.where(cur_y == 1)
        neg_indx = np.where(cur_y == 0)
        #print('pos ', len(pos_indx[0]), pos_indx[0])
        #print('neg ', len(neg_indx[0]), neg_indx[0])
        neg_indx_selected = np.random.choice(neg_indx[0], size=100-len(pos_indx[0]), replace=False)
        #print('neg select ', len(neg_indx_selected), neg_indx_selected)
        data_indx_selected = np.concatenate([pos_indx[0], neg_indx_selected])
        #print('indx selected ', len(indx_selected), indx_selected )
        cur_tag_record = X_tagRecord_byID[t_id]

        # task_y = cur_y[data_indx_selected]
        # task_x = X_tagRecord_byID[t_id]
       
        #print('task_y ', task_y.shape)
        #print('task_x ', task_x.shape)

        # clf = LogisticRegression(fit_intercept = False, max_iter=1000,random_state=0).fit(task_x, task_y)
        # pred_y = clf.predict(task_x)
        # #print('pred_y ', pred_y.shape, 'task_y ', task_y.shape)
        # print('acc ', np.sum(pred_y==task_y)/task_y.shape[0])

        # param = clf.coef_

        #print('param ', param.shape)

        #compressd_task_byID[str(t_id)] = (a[cur_indx, :], param, task_x, np.atleast_2d(task_y).T)
        compressd_task_byID[t_id] = (cur_indx, data_indx_selected, cur_tag_record)


        # i+= 1
        # if i == 3:
        #     a = pppp

    # with open(destination+'sampled_task_0826.pickle', 'wb') as handle:
    #     pickle.dump(compressd_task_byID, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return 0 

destination = '../ZSTL_Data/hetrec2011-lastfm-2k/extracted_feature/'
genCompressedData(userID_to_Y_row, artistID_to_X_row, X_tagRecord_byID, y, a, destination)

0

In [10]:
def storeTestData( y, a, userID_to_Y_row, artistID_to_X_row, tag_to_x_col, destination):
    totData = {}
    totData['y'] = y
    totData['a'] = a
    print('a ', a)
    totData['userID_to_Y_row'] = userID_to_Y_row
    totData['artistID_to_X_row'] = artistID_to_X_row
    totData['tag_to_x_col'] = tag_to_x_col
    totData['num_artist'] = len(list(artistID_to_X_row.keys()))
    totData['num_tag'] = len(list(tag_to_x_col.keys()))

    with open(destination+'detailed_data_0826.pickle', 'wb') as handle:
        pickle.dump(totData, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return 0
storeTestData( y, a, userID_to_Y_row, artistID_to_X_row, tag_to_x_col, destination)

a  [[1. 1. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


0

In [3]:
path_data = '../ZSTL_Data/hetrec2011-lastfm-2k/extracted_feature/'
compressed_data = 'sampled_task_0826.pickle'
detailed_data = 'detailed_data_0826.pickle'

with open(path_data+compressed_data, 'rb') as f:
    compressed_dataset = pickle.load(f)

with open(path_data+detailed_data, 'rb') as f:
    detailed_dataset = pickle.load(f)

In [6]:
class Dataset_hetrec(torch.utils.data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, compressed_data, detailed_data, preset_x = None, train_task=False, phase='Normal', learner='LR'):
        'Initialization'
        self.compressed_data = compressed_data

        self.num_artist = detailed_data['num_artist']
        #print('num_artist ', self.num_artist, len(list(detailed_data['artistID_to_X_row'].keys())))
        self.num_tag = detailed_data['num_tag']
        #print('num_tag', self.num_tag)
        self.userID_to_Y_row = detailed_data['userID_to_Y_row']
        self.artistID_to_X_row = detailed_data['artistID_to_X_row']
        self.tag_to_x_col = detailed_data['tag_to_x_col']
        #print('tag_to_x_col ',len(list(self.tag_to_x_col.keys())))
        #print

        
        self.y = detailed_data['y']
        self.a = detailed_data['a']
        #print('a ', self.a)
        self.pahse = phase
        self.param = {}
        print('learner ', learner)
        if preset_x is None:
            print('gen x', learner)
            self.x = np.zeros((self.num_artist, self.num_tag))
            if learner=='LR':
                self.genX()
            elif learner=='NN':
                print('x nn')
                self.genX(ones_col=False)
        else:
            self.x = preset_x

        if train_task:
            print('gen w, wait ', learner)
            if learner=='LR':
                self.genW_LR()
            elif learner=='NN':
                self.genW_NN()

            print('gen w done')
        else:
            print('not gen w')
            for k in self.userID_to_Y_row.keys():
                self.param[k] = np.atleast_2d(np.array(0))

  def genX(self, ones_col=True):
        tag_len = 0
        not_inLst = set()
        for data in self.compressed_data:
            k = data[0]
            cur_tag_record = data[1][-1]
            tag_len += len(cur_tag_record)
            #print('cur_tag_record ', cur_tag_record)
            for item in cur_tag_record:
                #print(item)
                artist = item[0]
                tag = item[1]
                if artist in self.artistID_to_X_row.keys():
                    self.x[self.artistID_to_X_row[artist], self.tag_to_x_col[tag]] += 1
                else:
                    not_inLst.add(artist)
            
        #print('x shape ', self.x.shape)
        if ones_col==True:
            print('concat ones')
            ones = np.ones((self.x.shape[0], 1))
            self.x = np.concatenate([ones, self.x], axis=1)
        print('x shape ', self.x.shape)

        
        # print('avg listen ', np.sum(self.x))
        # print('avg tag ', tag_len/len(self.compressed_data), tag_len)
        # print('num not in ', len(not_inLst))

  def genW_LR(self,):
        
        for data in self.compressed_data:
            k = data[0]
            cur_compressed_data = data[1]
            cur_y = self.y[cur_compressed_data[0],:]

            task_y = cur_y[cur_compressed_data[1]]
            task_x = [np.expand_dims(self.x[i,:], axis=0) for i in cur_compressed_data[1]]
            task_x = np.concatenate(task_x, axis=0)

            clf = LogisticRegression(fit_intercept = False, max_iter=1000,random_state=0).fit(task_x, task_y)
            pred_y = clf.predict(task_x)

            print('acc ', np.sum(pred_y==task_y)/task_y.shape[0])
            self.param[k] = clf.coef_
        #print(self.param[k].shape)

  def genW_NN(self,):
        for data in self.compressed_data:
            k = data[0]
            cur_compressed_data = data[1]
            cur_y = self.y[cur_compressed_data[0],:]

            task_y = np.expand_dims(cur_y[cur_compressed_data[1]], axis=0).T
            task_x = [np.expand_dims(self.x[i,:], axis=0) for i in cur_compressed_data[1]]
            task_x = np.concatenate(task_x, axis=0)
            #print('num i/o ',task_x.size()[1], task_y.size()[1])
            model = utils.CSR_model(task_x.shape[1], task_y.shape[1])

            #print('task y', task_y.shape, 'task x',task_x.shape)
            self.param[k] = utils.CSR_train(task_x, task_y, 200, 1e-4, model)
            
            #a = ppppp
            # print('acc ', np.sum(pred_y==task_y)/task_y.shape[0])
            # self.param[k] = clf.coef_
        #print(self.param[k].shape)
        self.shape_record = {}
        for i, w in enumerate(self.param[k]):
            self.shape_record[i] = [w.shape]
        print('shape record ', self.shape_record)
  def __len__(self):
        'Denotes the total number of samples'
        return len(self.compressed_data)

  def vectorize(self, weights):
        #weights with original model parameter shape
        flatted_param = []

        for i, w in enumerate(weights):
            #print(w)
            if len(w) == 0:
                pass
            else:
                flatted = w.flatten()
                flatted_param = flatted_param + list(flatted)
        
        return flatted_param

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        k = self.compressed_data[index][0]
        item = self.compressed_data[index][1]
        data_indx = item[0]
        # Load data and get label
        #print('data_indx ', data_indx)
        a = np.array(self.a[data_indx, :])
        a = np.expand_dims(a, axis=0)
      
        w = self.param[k]
        w = np.expand_dims(self.vectorize(w), axis=0)

        if self.pahse == 'Normal':
            selected_data = item[1]
            x = [np.expand_dims(self.x[i,:], axis=0) for i in selected_data]
            x = np.concatenate(x, axis=0)
            cur_y = self.y[data_indx, :]
            y = np.expand_dims(cur_y[selected_data], axis=0).T
            return a, w, x, y
        elif self.pahse == 'mAP':
            y = np.expand_dims(self.y[data_indx, :], axis=1)
            return a, w, y



In [7]:
from torch.utils.data import DataLoader

train_batch_size = 200
task_id = list(compressed_dataset.keys())
tot_len = len(task_id)
train_size = int(0.8*tot_len)
test_size = tot_len  - train_size
print('train_size ', train_size, 'test_size ', test_size)

support_size = 100

train_indx_full = list(np.random.choice(task_id, size=train_size, replace=False))
print(len(train_indx_full))
test_indx = [x for x in task_id if x not in train_indx_full]
print(len(test_indx))

train_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in train_indx_full], detailed_dataset, train_task=False, learner='NN')
preset_x =  train_data.x
print('preset x ',preset_x.shape)

support_indx = list(np.random.choice(train_indx_full, size=support_size, replace=False))
train_indx = [x for x in train_indx_full if x not in support_indx]
print(len(support_indx), len(train_indx))

support_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in support_indx], detailed_dataset, preset_x =preset_x, train_task=True, learner='NN')

train_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in train_indx], detailed_dataset,preset_x =preset_x, train_task=False)
test_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in test_indx], detailed_dataset, preset_x = preset_x, train_task=False)

support_loader = DataLoader(support_data, batch_size=support_size, shuffle=False)
train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=test_size, shuffle=True)

support_a, support_w, support_x, support_y = next(iter(support_loader))
print(support_a.shape, support_w.shape, support_x.shape, support_y.shape)

train_a, train_w, train_x, train_y = next(iter(train_loader))
train_a, train_w, train_x, train_y = train_a.float(), train_w.float(), train_x.float(), train_y.float()
print(train_a.shape, train_w.shape, train_x.shape, train_y.shape)
#print(train_a)

test_a, test_w, test_x, test_y = next(iter(test_loader))
test_a, test_w, test_x, test_y = test_a.float(), test_w.float(), test_x.float(), test_y.float()
print(test_a.shape, test_w.shape, test_x.shape, test_y.shape)
#a = ppp


test_data_full = Dataset_hetrec([(d, compressed_dataset[d]) for d in test_indx], detailed_dataset, preset_x =preset_x, phase='mAP', train_task=False)
test_data_full_loader = DataLoader(test_data_full, batch_size=test_size, shuffle=True)

test_batch = next(iter(test_data_full_loader))
test_a, test_w, test_y = test_batch[0].float(), test_batch[1].float(), \
    test_batch[2].float()

test_x = preset_x
print(test_a.shape, test_w.shape, test_y.shape, test_x.shape)

train_size  1513 test_size  379
1513
379
learner  NN
gen x NN
x nn
x shape  (17632, 9749)
not gen w
preset x  (17632, 9749)
100 1413
learner  NN
gen w, wait  NN
====> Epoch: 99 Average loss: 0.3587; ACC 0.8500000238418579
====> Epoch: 199 Average loss: 0.2215; ACC 0.8600000143051147


====> Epoch: 99 Average loss: 0.3757; ACC 0.9200000166893005
====> Epoch: 199 Average loss: 0.1834; ACC 0.949999988079071


====> Epoch: 99 Average loss: 0.3379; ACC 0.9300000071525574
====> Epoch: 199 Average loss: 0.1788; ACC 0.9399999976158142


====> Epoch: 99 Average loss: 0.2602; ACC 1.0
====> Epoch: 199 Average loss: 0.0915; ACC 1.0


====> Epoch: 99 Average loss: 0.2931; ACC 1.0
====> Epoch: 199 Average loss: 0.1034; ACC 1.0


====> Epoch: 99 Average loss: 0.2632; ACC 0.9900000095367432
====> Epoch: 199 Average loss: 0.0975; ACC 0.9900000095367432


====> Epoch: 99 Average loss: 0.3050; ACC 0.9700000286102295
====> Epoch: 199 Average loss: 0.1319; ACC 0.9800000190734863


====> Epoch: 99 Average l

In [41]:
#for comparison, only train and test

train_batch_size = 150
task_id = list(compressed_dataset.keys())
tot_len = len(task_id)
train_size = int(0.8*tot_len)
test_size = tot_len  - train_size
print('train_size ', train_size, 'test_size ', test_size)

train_indx = list(np.random.choice(task_id, size=train_size, replace=False))
print(len(train_indx))
test_indx = [x for x in task_id if x not in train_indx]
print(len(test_indx))

train_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in train_indx], detailed_dataset)
train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)

train_a, train_w, train_x, train_y = next(iter(train_loader))
train_a, train_w, train_x, train_y = train_a.float(), train_w.float(), train_x.float(), train_y.float()
print(train_a.shape, train_w.shape, train_x.shape, train_y.shape)

preset_x = utils.toTensor(train_data.x)

test_data = Dataset_hetrec([(d, compressed_dataset[d]) for d in test_indx], detailed_dataset, preset_x = preset_x)
test_loader = DataLoader(test_data, batch_size=test_size, shuffle=True)
#print(train_a)

test_a, test_w, test_x, test_y = next(iter(test_loader))
test_a, test_w, test_x, test_y = test_a.float(), test_w.float(), test_x.float(), test_y.float()
print(test_a.shape, test_w.shape, test_x.shape, test_y.shape)

#a = ppp



test_data_full = Dataset_hetrec([(d, compressed_dataset[d]) for d in test_indx], detailed_dataset, preset_x =preset_x, phase='mAP')
test_data_full_loader = DataLoader(test_data_full, batch_size=test_size, shuffle=True)

test_batch = next(iter(test_data_full_loader))
test_a, test_w, test_y = test_batch[0].float(), test_batch[1].float(), \
    test_batch[2].float()

test_x = preset_x
print(test_a.shape, test_w.shape, test_y.shape, test_x.shape)

train_size  1513 test_size  379
gen x
x shape  (17632, 9750)
gen w, wait 
gen w done
torch.Size([150, 1, 1892]) torch.Size([150, 1, 9750]) torch.Size([150, 100, 9750]) torch.Size([150, 100, 1])
gen w, wait 
gen w done
torch.Size([379, 1, 1892]) torch.Size([379, 1, 9750]) torch.Size([379, 100, 9750]) torch.Size([379, 100, 1])
gen w, wait 
gen w done
torch.Size([379, 1, 1892]) torch.Size([379, 1, 9750]) torch.Size([379, 17632, 1]) torch.Size([17632, 9750])


In [12]:
from torch.utils.data import DataLoader
def genSplits_hectrec(compressed_data, detailed_data, train_size, test_size, support_size, train_batch_size=100):

    task_id = list(compressed_data.keys())
    tot_len = len(task_id)

    support_indx = list(np.random.choice(task_id, size=support_size, replace=False))
    print(len(support_indx))
    temp = [x for x in task_id if x not in support_indx]
    train_indx = list(np.random.choice(temp, size=train_size, replace=False))
    temp = [x for x in temp if x not in train_indx]
    print(len(train_indx))
    test_indx = temp
    print(len(test_indx))

    support_data = utils.Dataset_hetrec([compressed_data[d] for d in support_indx], detailed_data)
    train_data = utils.Dataset_hetrec([compressed_data[d] for d in train_indx], detailed_data)
    test_data = utils.Dataset_hetrec([compressed_data[d] for d in test_indx], detailed_data)

    support_loader = DataLoader(support_data, batch_size=support_size, shuffle=False)
    train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=test_size, shuffle=True)

    return support_loader, train_loader, test_loader


In [13]:

path_data = '../ZSTL_Data/hetrec2011-lastfm-2k/extracted_feature/'
compressed_data = 'sampled_task.pickle'
detailed_data = 'detailed_data.pickle'

with open(path_data+compressed_data, 'rb') as f:
    compressed_dataset = pickle.load(f)

with open(path_data+detailed_data, 'rb') as f:
    detailed_dataset = pickle.load(f)


In [15]:
total_len = len(compressed_dataset)
print('tot_len ', total_len, compressed_dataset[2])
support_size = 150
test_size = int(total_len*0.2)
train_size = int(total_len - support_size - test_size)
support_loader, train_loader, test_loader = genSplits(compressed_dataset, detailed_dataset, train_size, test_size, support_size)

tot_len  1892 (0, array([[-1.72784869,  0.50704983,  0.30139832, ...,  0.        ,
         0.        ,  0.        ]]), array([    0,     1,     2,     3,     4,     5,     6,     7,     8,
           9,    10,    11,    12,    13,    14,    15,    16,    17,
          18,    19,    20,    21,    22,    23,    24,    25,    26,
          27,    28,    29,    30,    31,    32,    33,    34,    35,
          36,    37,    38,    39,    40,    41,    42,    43,    44,
          45,    46,    47,    48,    49, 10556, 14767,  1654,  9267,
        7242,  3435,  5862,  8099,  3307,  3973,  9003,  7807, 17251,
       16162, 14519,  1073,  4214,  1393, 12544,  9680, 10355,  3158,
        2921, 12854,  3927,  6533, 13634, 10059,  3989,  7949, 14261,
        4394,  6692, 15157,  2093,  2457,  3482, 16200, 15700,  8771,
       15223, 17363,  7165,  7816,   926, 16911,   574,   409,  5483,
        7275]))


NameError: name 'genSplits' is not defined

In [None]:
print(type(support_loader))
support_a, support_w, support_x, support_y = next(iter(support_loader))
support_a, support_w, support_x, support_y = support_a.float(), support_w.float(), support_x.float(), support_y.float()
print(support_a.shape, support_w.shape, support_x.shape, support_y.shape)

NameError: name 'np' is not defined