In [1]:
import sys
sys.path.append("..")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import datetime
from datetime import datetime, timedelta
from tools import *
import os

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import seaborn as sns
sns.set_style("ticks")
sns.set_context("notebook")
sns.color_palette("colorblind")
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 35
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 45
plt.rcParams['ytick.labelsize'] = 25
plt.rcParams['xtick.labelsize'] = 25
from matplotlib import rc
rc("text", usetex=True)
plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}' + '\n'  + r'\usepackage{amssymb}'

In [5]:
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

<Figure size 432x288 with 0 Axes>

In [6]:
in_folder = "../data/input/sociopatterns/"
out_folder = "../data/output/sociopatterns/"

In [7]:
datasets = os.listdir(in_folder)

seeds = [697752728, 4190089612, 1176914559, 3077924848, 315917623, 2544020234, 1077758578, 4071300106, 534591752, 3553386411]
K_affs = [1,3,5,7,9,11]
folds = [0,1,2,3,4]
colors = ["red", "orange", "blue", "grey", "cyan"]

In [8]:
datasets

['workplace01', 'highschool', 'hospital', 'primary_school', 'workplace02']

In [9]:
df = pd.DataFrame()

fold=0
dataset="workplace01"
K_aff=5

params = np.load(in_folder + dataset + "/params.npz")
mu, Z = params["u"], params["Z"]
data = read_data(in_folder + dataset + "/", "data")
masks = np.load(in_folder + dataset +"/mask.npz")["mask"]
T = masks.shape[1]
mask = masks[fold]


'''
    determine best seeds for each run
'''
best_score_noexp = -1e10
seed_noexp = seeds[0]

best_score_exp = -1e10
seed_exp = seeds[0]

for seed in seeds: 
    training_noexp = f"training_NoExp_{seed}_{fold}_{K_aff}.npz"  
    score_noexp = np.load(out_folder + dataset + "/" + training_noexp)["losses"][-1]
    if score_noexp > best_score_noexp: 
        best_score_noexp = score_noexp
        seed_noexp = seed

    training_exp = f"training_Exp_{seed}_{fold}_{K_aff}.npz"
    score_exp = np.load(out_folder + dataset + "/" + training_exp)["losses"][-1]
    if score_exp > best_score_exp: 
        best_score_exp = score_exp
        seed_exp = seed        

training_exp = f"training_Exp_{seed_exp}_{fold}_{K_aff}.npz"
print(training_exp)
losses = np.load(out_folder + dataset + "/" + training_exp)
print(out_folder + dataset + "/" + training_exp)

plot_losses(losses["losses"])
plot_losses(losses["theta_errors"])
plot_losses(losses["exp_errors"])

FileNotFoundError: [Errno 2] No such file or directory: '../data/output/sociopatterns/workplace01/training_NoExp_697752728_0_5.npz'

In [None]:
params = np.load(in_folder + dataset + "/params.npz")
mu, Z = params["u"], params["Z"]
data = read_data(in_folder + dataset + "/", "data")
masks = np.load(in_folder + dataset +"/mask.npz")["mask"]
T = masks.shape[1]
mask = masks[fold]

'''
    evaluate and store NoExp results
'''            
file_noexp = f"params_NoExp_{seed_noexp}_{fold}_{K_aff}.npz"
params_noexp = np.load(out_folder + dataset + "/" + file_noexp)
u_noexp, v_noexp, w_noexp = params_noexp["u"], params_noexp["v"], params_noexp["w"]
lam_noexp = u_noexp @ w_noexp @ v_noexp.T
lam_noexp = np.array([lam_noexp]*T)
AUC_A = calculate_AUC(data>0, lam_noexp, mask=mask)
df = df.append({"dataset": dataset, "Exp": 0, "fold": fold, "K_aff": K_aff, "seed": seed_noexp, "AUC_A": AUC_A}, ignore_index=True)
print(f"NoExp, fold={fold}: ", f"K_aff={K_aff}", "AUC_A=", AUC_A)     


'''
    evaluate and store Exp results
'''
file_exp = f"params_Exp_{seed_exp}_{fold}_{K_aff}.npz"
params_exp = np.load(out_folder + dataset + "/" + file_exp)
u_exp, v_exp, w_exp, mu_exp, Q = params_exp["u"], params_exp["v"], params_exp["w"], params_exp["mu"], params_exp["Q"]
lam_exp = u_exp @ w_exp @ v_exp.T
lam_exp_0 = np.array([lam_exp]*T)
lam_exp_1 = lam_exp_0 * Q
AUC_A = calculate_AUC(data>0, lam_exp_1, mask=mask)
acc_mu = evaluate_memberships(mu, mu_exp, mu, mu_exp)[0]
AUC_Q = calculate_AUC(Z, Q)
df = df.append({"dataset": dataset, "Exp": 1, "fold": fold, "seed": seed_exp, "K_aff": K_aff, "AUC_A": AUC_A, "acc_mu": acc_mu, "AUC_Q": AUC_Q}, ignore_index=True)
print(f"Exp, fold={fold}, K_aff={K_aff}: ", "AUC_A: ", AUC_A, "acc_mu=", acc_mu, f"AUC_Q={AUC_Q}")



'''
    visualize performance for each dataset and fold 0
'''
print_bold(f"K_aff: {K_aff}")
fig, axs = plt.subplots(2,3,figsize=(22,14))
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=None)

im = axs[0][0].imshow(np.log(data.sum(axis=0)+1))
#axs[0][0].xaxis.tick_top() 
axs[0][0].set_title("\\textbf{A}")  

axs[0][1].imshow((np.log(lam_exp_0.sum(axis=0)+1)))
#axs[0][1].xaxis.tick_top()
axs[0][1].set_title("$\log(\lambda_{Exp}^0)$", pad=20)

axs[0][2].imshow(np.log(lam_exp_1.sum(axis=0)+1))
#axs[0][2].xaxis.tick_top()
axs[0][2].set_title("$\log(\lambda_{Exp}^1)$", pad=20)


axs[1][0].imshow(Z.sum(axis=0))
#axs[1][0].xaxis.tick_top()
axs[1][0].set_title("\\textbf{Z}")

axs[1][1].imshow(np.log(Q.sum(axis=0)+1))
#axs[1][1].xaxis.tick_top()
axs[1][1].set_title("$\log(Q)$", pad=20)

axs[1][2].imshow(np.log(np.log(mu_exp@mu_exp.T +1)+1))
#axs[1][2].xaxis.tick_top()
axs[1][2].set_title("$\log(\mu\cdot\mu^T)$", pad=20)

plt.setp(axs, yticks=plt.xticks()[0][1:-1])

cbar = fig.colorbar(im, ax=axs.ravel().tolist(), shrink=0.925)
#cbar.set_ticks(np.arange(0, 1.1, 0.5))
#cbar.set_ticklabels(['low', 'medium', 'high'])

#plt.savefig(f"figures/{dataset}.png", bbox_inches="tight", dpi=300)
plt.show()

In [None]:
df = pd.DataFrame()

for dataset in datasets: 
    if dataset != "workplace01": 
        continue
    print_bold(dataset)
    dataset_files = os.listdir(out_folder + dataset)
    params = np.load(in_folder + dataset + "/params.npz")
    u, Z = params["u"], params["Z"]
    K_exp = u.shape[1]
    data = read_data(in_folder + dataset + "/", "data")
    masks = np.load(in_folder + dataset +"/mask.npz")["mask"]
    T = masks.shape[1]
    
    for fold in folds: 
        mask = masks[fold]
        for K_aff in K_affs: 
            acc_u, acc_v = 0, 0
            '''
                determine best seeds for each run
            '''
            best_score_noexp = -1e10
            seed_noexp = seeds[0]

            best_score_exp = -1e10
            seed_exp = seeds[0]

            for seed in seeds: 
                training_noexp = f"training_NoExp_{seed}_{fold}_{K_aff}.npz"  
                score_noexp = np.load(out_folder + dataset + "/" + training_noexp)["losses"][-1]
                if score_noexp > best_score_noexp: 
                    best_score_noexp = score_noexp
                    seed_noexp = seed

                training_exp = f"training_Exp_{seed}_{fold}_{K_aff}.npz"
                if training_exp in dataset_files: 
                    score_exp = np.load(out_folder + dataset + "/" + training_exp)["losses"][-1]
                    if score_exp > best_score_exp: 
                        best_score_exp = score_exp
                        seed_exp = seed        


            '''
                evaluate and store NoExp results
            '''            
            file_noexp = f"params_NoExp_{seed_noexp}_{fold}_{K_aff}.npz"
            params_noexp = np.load(out_folder + dataset + "/" + file_noexp)
            u_noexp, v_noexp, w_noexp = params_noexp["u"], params_noexp["v"], params_noexp["w"]
            if K_exp == K_aff: 
                acc_u, acc_v, _, _ = evaluate_memberships(u, u_noexp, u, v_noexp)
            lam_noexp = u_noexp @ w_noexp @ v_noexp.T
            lam_noexp = np.array([lam_noexp]*T)
            AUC_A = calculate_AUC(data>0, lam_noexp, mask=mask)
            df = df.append({"dataset": dataset, "Exp": 0, "fold": fold, "K_aff": K_aff, "seed": seed_noexp, "acc_u": (acc_u+acc_v)/2, "AUC_A": AUC_A}, ignore_index=True)
            print(f"NoExp, fold={fold}: ", f"K_aff={K_aff}", "acc_u= ", (acc_u+acc_v)/2, "AUC_A=", AUC_A)     


            '''
                evaluate and store Exp results
            '''
            file_exp = f"params_Exp_{seed_exp}_{fold}_{K_aff}.npz"
            params_exp = np.load(out_folder + dataset + "/" + file_exp)
            u_exp, v_exp, w_exp, mu_exp, Q = params_exp["u"], params_exp["v"], params_exp["w"], params_exp["mu"], params_exp["Q"]
            lam_exp = u_exp @ w_exp @ v_exp.T
            lam_exp_0 = np.array([lam_exp]*T)
            lam_exp_1 = lam_exp_0 * Q
            AUC_A = calculate_AUC(data>0, lam_exp_1, mask= mask)
            acc_mu = evaluate_memberships(u, mu_exp, u, mu_exp)[0]
            if K_exp == K_aff: 
                acc_u, acc_v, _, _ = evaluate_memberships(u, u_exp, u, v_exp)
            AUC_Q = calculate_AUC(Z, Q)
            df = df.append({"dataset": dataset, "Exp": 1, "fold": fold, "seed": seed_exp, "K_aff": K_aff, "AUC_A": AUC_A, "acc_u": (acc_u+acc_v)/2, "acc_mu": acc_mu, "AUC_Q": AUC_Q}, ignore_index=True)
            print(f"Exp, fold={fold}, K_aff={K_aff}: ", f"AUC_A={AUC_A}", f"acc_u={(acc_u+acc_v)/2} ", "acc_mu=", acc_mu, f"AUC_Q={AUC_Q}")
       
    
    
            '''
                visualize performance for each dataset and fold 0
            '''
            if fold == 0:
                print_bold(f"K_aff: {K_aff}")
                fig, axs = plt.subplots(2,3,figsize=(22,14))
                plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=None)
                
                im = axs[0][0].imshow(np.log((data*mask).sum(axis=0)+1))
                #axs[0][0].xaxis.tick_top() 
                axs[0][0].set_title("\\textbf{A}")  

                axs[0][1].imshow((np.log((lam_noexp*mask).sum(axis=0)+1)))
                #axs[0][1].xaxis.tick_top()
                axs[0][1].set_title("$\log(\lambda_{NoExp})$", pad=20)

                axs[0][2].imshow(np.log((lam_exp_1*mask).sum(axis=0)+1))
                #axs[0][2].xaxis.tick_top()
                axs[0][2].set_title("$\log(\lambda_{Exp})$", pad=20)


                axs[1][0].imshow(Z.sum(axis=0))
                #axs[1][0].xaxis.tick_top()
                axs[1][0].set_title("\\textbf{Z}")

                axs[1][1].imshow(np.log(Q.sum(axis=0)+1))
                #axs[1][1].xaxis.tick_top()
                axs[1][1].set_title("$\log(Q)$", pad=20)

                axs[1][2].imshow(np.log(mu_exp@mu_exp.T +1))
                #axs[1][2].xaxis.tick_top()
                axs[1][2].set_title("$\log(\mu\cdot\mu^T)$", pad=20)

                plt.setp(axs, yticks=plt.xticks()[0][1:-1])

                cbar = fig.colorbar(im, ax=axs.ravel().tolist(), shrink=0.925)
                #cbar.set_ticks(np.arange(0, 1.1, 0.5))
                #cbar.set_ticklabels(['low', 'medium', 'high'])

                #plt.savefig(f"figures/{dataset}.png", bbox_inches="tight", dpi=300)
                plt.show()

        

## Analyzing AUC Q

In [None]:
dataset_titles = {"highschool": "Highschool", "primary_school": "Primary School", "hospital": "Hospital", "workplace01": "Workplace01", "workplace02": "Workplace02"}

In [None]:
Q_stats = pd.DataFrame()
for dataset in datasets: 
    vals = df.loc[(df["dataset"] == dataset) & (df["Exp"]==1)]["AUC_Q"].to_numpy()
    print(vals)
    mean, var = mean_confidence_interval(vals)
    Q_stats = Q_stats.append({"": dataset_titles[dataset], "Mean":np.round(mean, 4), "Variance":np.round(var, 4)}, ignore_index=True) 

## Scatter Plot for the link prediction

In [None]:
print(datasets)
labels = ["Workplace01", "Highschool", "Hospital", "PrimarySchool", "Workplace02"]
markers = ["*", "d", "v", "+", "p"]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
#plt.title("$AUC(A, \lambda_{NoExp})\ vs.\ AUC(A, \lambda_{Exp})$", pad=15)
#plt.title("Link Prediction")
ax.set_xlabel("NoExp")
ax.set_ylabel("Exp")



for i,dataset in enumerate(datasets): 
    if dataset != "workplace01":
        continue
    for fold in folds: 
        x = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 0)]["AUC_A"].to_numpy()[0]
        y = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 1)]["AUC_A"].to_numpy()[0]
        plt.scatter(x,y,color="green" if y>x else "red", marker=markers[i], s=100, label=labels[i] if fold==0 else "")
ax.plot(np.arange(0.3,0.95,0.01), np.arange(0.3,0.95,0.01), color="grey", ls=(0, (5, 10)),lw=2)
ax.margins(x=0,y=0)
plt.legend(fontsize="xx-large")
#plt.savefig("figures/link_prediction.png", bbox_inches="tight", dpi=300)
plt.show()

## Membership Predictions

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
#plt.title("Membership Predictions", pad=15)
ax.set_xlabel("NoExp")
ax.set_ylabel("Exp")

ax.set_xlim([0.5,1])
ax.set_ylim([0.45,1])

for i,dataset in enumerate(datasets): 
    for fold in folds: 
        x = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 0)]["acc_u"].to_numpy()[0]
        y = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 1)]["acc_mu"].to_numpy()[0]
        plt.scatter(x,y,color="green" if y>x else "red", marker=markers[i], s=100, label=labels[i] if fold==0 else "")
ax.plot(np.arange(0.5, 1, 0.01), np.arange(0.5, 1,0.01), color="grey", ls=(0, (5, 10)),lw=2)
ax.margins(x=0,y=0)
plt.legend(fontsize="xx-large")
plt.savefig("figures/memberships.png", bbox_inches="tight", dpi=300)
plt.show()

In [None]:
df.loc[(df["Exp"] == 1)]["AUC_Q"].mean()

In [None]:
df.loc[(df["Exp"] == 1)]["AUC_A"].mean()

# Membership Predictions $u$

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
plt.title("Membership Predictions", pad=15)
ax.set_xlabel("NoExp")
ax.set_ylabel("Exp")

for i,dataset in enumerate(datasets): 
    for fold in folds: 
        x = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 0)]["acc_u"].to_numpy()[0]
        y = df.loc[(df["dataset"] == dataset) & (df["fold"] == fold) & (df["Exp"] == 1)]["acc_u"].to_numpy()[0]
        plt.scatter(x,y,color="green" if y>x else "red", marker=markers[i], label=labels[i] if fold==0 else "")
ax.plot(np.arange(0.5, 1, 0.01), np.arange(0.5, 1,0.01), color="red", ls='-.',lw=0.5)
plt.legend()
#plt.savefig("figures/memberships_None.svg")
plt.show()