In [2]:
import pandas as pd
import numpy as np
import re
import glob
from matplotlib import pyplot as plt
import seaborn as sns
from math import comb, exp, factorial
from itertools import combinations, permutations
import json
from matplotlib.backends.backend_pdf import PdfPages
# plt.style.use('ggplot')
sns.set_theme()

In [3]:
filenames = glob.glob(r'../data/*/*.csv')
prob = {"PR4": 0, "INTR_0_5_R1": 0.5, "INTR_0_9_R1":0.9}
# filename = "../data/PR4/sim_100_2_10_20201122_193805.csv"
regex = r".+[\\/]{1,2}(.+)[\\/]{1,2}sim_(\d+)_(\d+)_(\d+)_.*\.csv"
def toInt(i):
    try: return int(i)
    except ValueError:
        return str(i)

def makeArray(text):
    return np.fromstring(text[1: -1], dtype=np.int, sep=',')

def b(l: int,
      n: int,
      p: float) -> float:
    # print("l,n,p", (l, n, p))
    # print("Comb = ", comb(n, l) * (p ** l) * ((1 - p) ** n - l))
    return comb(n, l) * (p ** l) * ((1 - p) ** (n - l))

def pr_p(K: int,
         M: int,
         By: np.ndarray,
         Bz: np.ndarray) -> float:
    product = 1
    for i in range(0, M):
        sum_arr = [b(l, sum(Bz), 1/M) for l in range(0, By[i])]
        # print(i, 1 - sum(sum_arr))
        product = product * (1 - sum(sum_arr))
    return round(product, 6)

def pr_fp(pr_p, pr_dp):
    return (1 - pr_p) * pr_dp

def Vz_Vy(y: np.ndarray, z: np.ndarray)-> bool:
    return np.all(np.less(y, z))

def Bz_By(b_y:np.ndarray, b_z: np.ndarray)-> bool:
    return np.all(np.greater_equal(b_z, b_y))

def tp_fun(y: np.ndarray, z: np.ndarray, b_y:np.ndarray, b_z: np.ndarray) -> int:
    if (np.all(np.less(y, z))) and (np.all(np.greater_equal(b_z, b_y))):
        return 1
    return 0

def fp_fun(y: np.ndarray, z: np.ndarray, b_y:np.ndarray, b_z: np.ndarray) -> int:
    if (np.all(np.less(y,z)) == False) and (np.all(np.greater_equal(b_z, b_y))):
        return 1
    else:
        return 0
        
def fn_fun(y: np.ndarray, z: np.ndarray, b_y:np.ndarray, b_z: np.ndarray) -> int:
    if (np.all(np.less(y, z))) and (np.all(np.greater_equal(b_z, b_y)) == False):
        return 1
    else:
        return 0   
    
def tn_fun(y: np.ndarray, z: np.ndarray, b_y:np.ndarray, b_z: np.ndarray) -> int:
    if (np.all(np.less(y,z)) == False) and ((np.all(np.greater_equal(b_z, b_y))) == False):
        return 1
    else:
        return 0
    
def acc_fun(tp, tn, fp, fn):
    return (tp + tn)/ (tp + tn + fp + fn)

def prec_fun(tp, fp):
    return (tp)/(tp + fp)

def fpr_fun(tn, fp) :
    return (fp)/ (tn + fp)

def reduce_yz(a, b):  
    return min(np.nanmin(a), np.nanmin(b))


In [14]:
# print(filenames)
#DF
for filename in filenames:
    (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]
    
    print(run, n, k, m)
    df = pd.read_csv(filename,
                 dtype={'GSN': np.int32, 'i': np.int32, 'x': np.int32, 'VC': str, 'BC': str, 'Type': str},
                 sep=';', skipinitialspace=True)
    df['VC'] = df['VC'].apply(makeArray)
    df['BC'] = df['BC'].apply(makeArray)
    
    y = df.loc[df['GSN'] == 10 * n]
    df[['B_y', 'V_y']] = np.NaN
    B_y = y['BC'].values[0]
    V_y = y['VC'].values[0]
    df['B_y'] = df['B_y'].apply(lambda x: B_y)
    df['V_y'] = df['V_y'].apply(lambda x: V_y)
#     df['B_y'] = B_y.tostring
    df['pr_p'] = df[['GSN', 'BC']].apply(lambda x: (pr_p(k, m, B_y, x['BC']) if ( (10 * n + 1) <= x['GSN'] <= (n**2 + 10*n) ) else 0.0),
                                         axis=1)
    df['pr_dp'] = df['BC'].apply(lambda x: 1 if np.all(np.greater_equal(x, B_y)) else 0)
    df['pr_fp'] = df[['pr_p', 'pr_dp']].apply(lambda x: pr_fp(x.pr_p, x.pr_dp), axis=1)
    
    tempdf = df.loc[(df['GSN'] >=  10 * n + 1) & (df['GSN'] <= n**2 + 10*n)]
    tempdf.to_csv(f"../Results/{run}_df_{n}_{k}_{m}.csv")


INTR_0_9_R1 100 2 10
INTR_0_9_R1 100 2 20
INTR_0_9_R1 100 2 30
INTR_0_9_R1 100 3 10
INTR_0_9_R1 100 3 20
INTR_0_9_R1 100 3 30
INTR_0_9_R1 100 4 10
INTR_0_9_R1 100 4 20
INTR_0_9_R1 100 4 30


In [109]:
#GRAPH

with PdfPages("prp_diags.pdf") as pdf:
    for filename in glob.glob("../Results/*.csv"):
        regex = r".+[\\/](.+)_df_(\d+)_(\d+)_(\d+).csv"
        (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]


        print(run, n, k, m)
        #     fig, ax = plt.subplots(figsize=(40, 20))
        tempdf = pd.read_csv(filename)
        tempdf['VC'] = tempdf['VC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['BC'] = tempdf['BC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['B_y'] = tempdf['B_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['V_y'] = tempdf['V_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf2 = tempdf.iloc[::10, :]
        sns.lmplot(x='GSN',
                   y='pr_p',
                   data=tempdf2,
                   fit_reg=False,
                   height=4,
                   aspect=1.2,
                   scatter_kws={"s": 3000//n, "linewidth":0, "alpha":0.7})
        plt.title(f"$pr_p$ vs $GSN$ $(n, k, m, Pr_{{int}}) = ({n}, {k}, {m}, {prob[run]} )$")
        plt.xlabel("GSN")
        plt.xticks(
            np.arange(min(tempdf['GSN']) - 1,
                      max(tempdf['GSN']) - 1, round((n * (n + 10)//10), -3)), fontsize=7)
        plt.yticks(fontsize=10)
        plt.ylabel(r"$Pr_p$")
        
#         plt.savefig(f'../models/prp/{run}_plot2_{n}_{k}_{m}.png',
#                     bbox_inches='tight',
#                     dpi=200)
#         plt.show()
        plt.tight_layout()
        plt.savefig(pdf, format='pdf', bbox_inches='tight')
        plt.cla()
        plt.close()
        
        

INTR_0_5_R1 100 2 10
INTR_0_5_R1 100 2 20
INTR_0_5_R1 100 2 30
INTR_0_5_R1 100 3 10
INTR_0_5_R1 100 3 20
INTR_0_5_R1 100 3 30
INTR_0_5_R1 100 4 10
INTR_0_5_R1 100 4 20
INTR_0_5_R1 100 4 30
INTR_0_5_R1 200 2 20
INTR_0_5_R1 200 2 40
INTR_0_5_R1 200 2 60
INTR_0_5_R1 200 3 20
INTR_0_5_R1 200 3 40
INTR_0_5_R1 200 3 60
INTR_0_5_R1 200 4 20
INTR_0_5_R1 200 4 40
INTR_0_5_R1 200 4 60
INTR_0_5_R1 300 2 30
INTR_0_5_R1 300 2 60
INTR_0_5_R1 300 2 90
INTR_0_5_R1 300 3 30
INTR_0_5_R1 300 3 60
INTR_0_5_R1 300 3 90
INTR_0_5_R1 300 4 30
INTR_0_5_R1 300 4 60
INTR_0_5_R1 300 4 90
INTR_0_9_R1 100 2 10
INTR_0_9_R1 100 2 20
INTR_0_9_R1 100 2 30
INTR_0_9_R1 100 3 10
INTR_0_9_R1 100 3 20
INTR_0_9_R1 100 3 30
INTR_0_9_R1 100 4 10
INTR_0_9_R1 100 4 20
INTR_0_9_R1 100 4 30
INTR_0_9_R1 200 2 20
INTR_0_9_R1 200 2 40
INTR_0_9_R1 200 2 60
INTR_0_9_R1 200 3 20
INTR_0_9_R1 200 3 40
INTR_0_9_R1 200 3 60
INTR_0_9_R1 200 4 20
INTR_0_9_R1 200 4 40
INTR_0_9_R1 200 4 60
INTR_0_9_R1 300 2 30
INTR_0_9_R1 300 2 60
INTR_0_9_R1 3

In [110]:
#GRAPH
with PdfPages("prfp_diags.pdf") as pdf1, PdfPages("prfp2_diags.pdf") as pdf2:
    for filename in glob.glob("../Results/*.csv"):
        regex = r".+[\\/](.+)_df_(\d+)_(\d+)_(\d+).csv"
        (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]
        print(run, n, k, m)
        #     fig, ax = plt.subplots(figsize=(40, 20))
        tempdf = pd.read_csv(filename)
        tempdf.drop('Unnamed: 0', axis=1, inplace=True)
        tempdf['VC'] = tempdf['VC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['BC'] = tempdf['BC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['B_y'] = tempdf['B_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['V_y'] = tempdf['V_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        tempdf['pr_fp2'] = tempdf['pr_p'].apply(lambda x: (1 - x) * x)
        tempdf['ActualRes'] = tempdf[['VC', 'V_y']].apply(
            (lambda x: "Actual Positive"
             if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"),
            axis=1)
        tempdf2 = tempdf.iloc[::10, :]
        # Fig 1
    #     print(tempdf.keys())
        sns.lmplot(x='GSN',
                   y='pr_fp',
                   data=tempdf2,
                   fit_reg=False,
                   height=4,
                   aspect=1.2,
                   hue='ActualRes',
                   scatter_kws={"s": 3000//n, "linewidth":0, "alpha":0.7})
        plt.title(f"$pr_{{fp}} = (1 - pr_p)pr_{{(\delta)p}}$ vs $GSN$ $(n, k, m, Pr_{{int}}) = ({n}, {k}, {m}, {prob[run]} )$")
        plt.xlabel("GSN")
        plt.xticks(
            np.arange(min(tempdf['GSN']) - 1,
                      max(tempdf['GSN']) - 1, round((n * (n + 10)//10), -3)))
        plt.ylabel(r"$Pr_{fp}$")
#         plt.savefig(f'../models/prfp/{run}_plot2_{n}_{k}_{m}.png',
#                    bbox_inches='tight', dpi=200)
        plt.tight_layout()
        plt.savefig(pdf2, format='pdf', bbox_inches='tight')
        #plt.show()
        plt.cla()
        plt.close()

        #Fig 2
        sns.lmplot(x='GSN',
                   y='pr_fp2',
                   data=tempdf2,
                   fit_reg=False,
                   height=4,
                   aspect=1.2,
                   hue='ActualRes',
                   scatter_kws={"s": 3000//n, "linewidth":0, "alpha":0.7})
        plt.title(f"$pr_{{fp}} = (1 - pr_p)pr_{{p}}$ vs $GSN$ $(n, k, m, Pr_{{int}}) = ({n}, {k}, {m}, {prob[run]} )$")
        plt.xlabel("GSN")
        plt.xticks(
            np.arange(min(tempdf['GSN']) - 1,
                      max(tempdf['GSN']) - 1, round((n * (n + 10)//10), -3)))
        plt.ylabel(r"$Pr_p$")
        plt.tight_layout()
#         plt.savefig(f'../models/prfp2/{run}_plot2_{n}_{k}_{m}.png',
#                    bbox_inches='tight', dpi=200)
        plt.savefig(pdf1, format='pdf', bbox_inches='tight')
        #plt.show()
        plt.cla()
        plt.close()
        del tempdf
        del tempdf2
        


INTR_0_5_R1 100 2 10
INTR_0_5_R1 100 2 20
INTR_0_5_R1 100 2 30
INTR_0_5_R1 100 3 10
INTR_0_5_R1 100 3 20
INTR_0_5_R1 100 3 30
INTR_0_5_R1 100 4 10
INTR_0_5_R1 100 4 20
INTR_0_5_R1 100 4 30
INTR_0_5_R1 200 2 20
INTR_0_5_R1 200 2 40
INTR_0_5_R1 200 2 60
INTR_0_5_R1 200 3 20
INTR_0_5_R1 200 3 40
INTR_0_5_R1 200 3 60
INTR_0_5_R1 200 4 20
INTR_0_5_R1 200 4 40
INTR_0_5_R1 200 4 60
INTR_0_5_R1 300 2 30
INTR_0_5_R1 300 2 60
INTR_0_5_R1 300 2 90
INTR_0_5_R1 300 3 30
INTR_0_5_R1 300 3 60
INTR_0_5_R1 300 3 90
INTR_0_5_R1 300 4 30
INTR_0_5_R1 300 4 60
INTR_0_5_R1 300 4 90
INTR_0_9_R1 100 2 10
INTR_0_9_R1 100 2 20
INTR_0_9_R1 100 2 30
INTR_0_9_R1 100 3 10
INTR_0_9_R1 100 3 20
INTR_0_9_R1 100 3 30
INTR_0_9_R1 100 4 10
INTR_0_9_R1 100 4 20
INTR_0_9_R1 100 4 30
INTR_0_9_R1 200 2 20
INTR_0_9_R1 200 2 40
INTR_0_9_R1 200 2 60
INTR_0_9_R1 200 3 20
INTR_0_9_R1 200 3 40
INTR_0_9_R1 200 3 60
INTR_0_9_R1 200 4 20
INTR_0_9_R1 200 4 40
INTR_0_9_R1 200 4 60
INTR_0_9_R1 300 2 30
INTR_0_9_R1 300 2 60
INTR_0_9_R1 3

In [22]:
#DF
for filename in glob.glob("F://BloomClock/sims/Results/*.csv"):
    regex = r".+[\\/](.+)_df_(\d+)_(\d+)_(\d+).csv"
    (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]
    print(run, n, k, m)


    tempdf = pd.read_csv(filename)
    tempdf.drop('Unnamed: 0', axis=1, inplace=True)
    tempdf['VC'] = tempdf['VC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=np.uint16, sep=' '))
    tempdf['BC'] = tempdf['BC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=np.uint16, sep=' '))
    
    #tempdf['B_y'] = tempdf['B_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
    #tempdf['V_y'] = tempdf['V_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
    #tempdf['pr_fp2'] = tempdf['pr_p'].apply(lambda x: (1 - x) * x)
    #tempdf['ActualRes'] = tempdf[['VC', 'V_y']].apply((lambda x: "Actual Positive" if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"), axis=1)
    tempdf2 = tempdf.iloc[::10, :]
    # remove some unwated rows
    if(n == 100):
        tempdf2 = tempdf2[tempdf2['GSN'] <= 8000]
    elif(n==200):
        tempdf2 = tempdf2[tempdf2['GSN'] <= 34000]
    else:
        tempdf2 = tempdf2[tempdf2['GSN'] <= 60000]

    (a,b) = map(list, zip(*permutations(tempdf2.index, 2)))
    tempdf3 = pd.concat(
        [tempdf2.loc[a][['GSN', 'VC', 'BC']].reset_index(drop=True), tempdf2.loc[b][['GSN', 'VC', 'BC']].reset_index(drop=True)], axis=1
    )

    new_cols = ['GSN_x', 'VC_x', 'BC_x', 'GSN_x1', 'VC_x1', 'BC_x1']
    tempdf3.columns = new_cols
    
    # Free Space
    del a
    del b
    del tempdf
    del tempdf2
    
    tempdf3[['TP', 'FP', 'FN', 'TN']] = 0
    tempdf3['TP'] = tempdf3.apply(lambda x: tp_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['TN'] = tempdf3.apply(lambda x: tn_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['FP'] = tempdf3.apply(lambda x: fp_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['FN'] = tempdf3.apply(lambda x: fn_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)

    
    tempdf3.to_pickle(f"F://BloomClock/sims/Results/merged/{run}_merged_df_{n}_{k}_{m}.pkl")
 
    del tempdf3
    



INTR_0_5_R1 100 2 10
INTR_0_5_R1 100 2 20
INTR_0_5_R1 100 2 30
INTR_0_5_R1 100 3 10
INTR_0_5_R1 100 3 20
INTR_0_5_R1 100 3 30
INTR_0_5_R1 100 4 10
INTR_0_5_R1 100 4 20
INTR_0_5_R1 100 4 30
INTR_0_5_R1 200 2 20
INTR_0_5_R1 200 2 40
INTR_0_5_R1 200 2 60
INTR_0_5_R1 200 3 20
INTR_0_5_R1 200 3 40
INTR_0_5_R1 200 3 60
INTR_0_5_R1 200 4 20
INTR_0_5_R1 200 4 40
INTR_0_5_R1 200 4 60
INTR_0_5_R1 300 2 30
INTR_0_5_R1 300 2 60
INTR_0_5_R1 300 2 90
INTR_0_5_R1 300 3 30
INTR_0_5_R1 300 3 60
INTR_0_5_R1 300 3 90
INTR_0_5_R1 300 4 30
INTR_0_5_R1 300 4 60
INTR_0_5_R1 300 4 90
INTR_0_9_R1 100 2 10
INTR_0_9_R1 100 2 20
INTR_0_9_R1 100 2 30
INTR_0_9_R1 100 3 10
INTR_0_9_R1 100 3 20
INTR_0_9_R1 100 3 30
INTR_0_9_R1 100 4 10
INTR_0_9_R1 100 4 20
INTR_0_9_R1 100 4 30
INTR_0_9_R1 200 2 20
INTR_0_9_R1 200 2 40
INTR_0_9_R1 200 2 60
INTR_0_9_R1 200 3 20
INTR_0_9_R1 200 3 40
INTR_0_9_R1 200 3 60
INTR_0_9_R1 200 4 20
INTR_0_9_R1 200 4 40
INTR_0_9_R1 200 4 60
INTR_0_9_R1 300 2 30
INTR_0_9_R1 300 2 60
INTR_0_9_R1 3

In [9]:
run, n, k, m = ["PR4", 100, 2, 10]
tempdf3 = pd.read_pickle(f"F://BloomClock/sims/Results/merged/{run}_merged_df_{n}_{k}_{m}.pkl")
tempdf3.head(10)

Unnamed: 0,GSN_x,VC_x,BC_x,GSN_x1,VC_x1,BC_x1,TP,FP,FN,TN
0,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1011,"[3, 4, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 8, ...","[6, 5, 6, 6, 5, 7, 4, 5, 6, 6]",0,0,0,1
1,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1021,"[3, 4, 1, 0, 4, 2, 0, 0, 2, 7, 1, 4, 3, 3, 6, ...","[6, 7, 6, 11, 8, 7, 6, 7, 6, 6]",0,1,0,0
2,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1031,"[0, 8, 0, 5, 0, 1, 0, 5, 0, 3, 3, 0, 4, 3, 1, ...","[7, 5, 6, 4, 6, 8, 5, 7, 5, 6]",0,0,0,1
3,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1041,"[0, 0, 0, 1, 4, 2, 1, 0, 4, 0, 0, 1, 3, 8, 9, ...","[5, 6, 6, 4, 7, 7, 4, 6, 6, 4]",0,0,0,1
4,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1051,"[3, 0, 1, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 4, ...","[3, 5, 4, 8, 5, 3, 3, 4, 5, 3]",0,0,0,1
5,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1061,"[1, 0, 0, 0, 1, 1, 2, 4, 0, 0, 0, 0, 6, 0, 0, ...","[6, 4, 6, 9, 6, 10, 6, 5, 5, 6]",0,0,0,1
6,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1071,"[0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 1, 0, 0, ...","[4, 3, 3, 2, 4, 5, 4, 5, 2, 5]",0,0,0,1
7,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1081,"[0, 0, 0, 1, 4, 2, 1, 0, 4, 0, 0, 1, 3, 8, 9, ...","[5, 6, 8, 5, 7, 7, 4, 6, 6, 8]",0,0,0,1
8,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1091,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, ...","[3, 3, 5, 2, 3, 4, 3, 4, 6, 4]",0,0,0,1
9,1001,"[0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 6, ...","[5, 7, 3, 6, 8, 7, 6, 6, 5, 5]",1101,"[3, 4, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...","[6, 4, 4, 5, 7, 6, 4, 7, 5, 3]",0,0,0,1


In [None]:
tempdf3.loc[(tempdf3['GSN_x'] == 2371) & ( tempdf3['GSN_x1'] == 1001)]

In [None]:
tempdf3.query('(TP == TN == FP == FN == 0) or ( TP == 1 and FP == 1) or ( TP == 1 and TN == 1) or ( TP == 1 and FN == 1) or ( TN == 1 and FP == 1) or ( TN == 1 and FN == 1) or ( FP == 1 and FN == 1)')

In [26]:
#DF
def convList(text):
    return json.loads(text)


for filename in glob.glob("F://BloomClock/sims/Results/*.csv"):
    regex = r".+[\\/](.+)_df_(\d+)_(\d+)_(\d+).csv"
    (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]

    print(run, n, k, m)

    
    tempdf = pd.read_csv(filename)
    tempdf.drop('Unnamed: 0', axis=1, inplace=True)
    tempdf['VC'] = tempdf['VC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
    tempdf['BC'] = tempdf['BC'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
#     tempdf['VC'] = tempdf['VC'].apply(lambda x: convList("[" + ",".join(x[1: -1].split()) + "]"))
#     tempdf['BC'] = tempdf['BC'].apply(lambda x: convList("[" + ",".join(x[1: -1].split()) + "]"))
    if(n == 100):
        tempdf = tempdf[tempdf['GSN'] <= 8000]
    elif(n== 200):
        tempdf = tempdf[tempdf['GSN'] <= 34000]
    else:
        tempdf = tempdf[tempdf['GSN'] <= 60000]
    #tempdf['B_y'] = tempdf['B_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
    #tempdf['V_y'] = tempdf['V_y'].apply(lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
    #tempdf['pr_fp2'] = tempdf['pr_p'].apply(lambda x: (1 - x) * x)
    #tempdf['ActualRes'] = tempdf[['VC', 'V_y']].apply((lambda x: "Actual Positive" if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"), axis=1)
    tempdf2 = tempdf.iloc[::n//10, :]
    # remove some unwated rows
    

    (a,b) = map(list, zip(*permutations(tempdf2.index, 2)))
    tempdf3 = pd.concat(
        [tempdf2.loc[a][['GSN', 'VC', 'BC']].reset_index(drop=True), tempdf2.loc[b][['GSN', 'VC', 'BC']].reset_index(drop=True)], axis=1
    )

    new_cols = ['GSN_x', 'VC_x', 'BC_x', 'GSN_x1', 'VC_x1', 'BC_x1']
    tempdf3.columns = new_cols
    
    # Free Space
    del a
    del b
    del tempdf
    del tempdf2
    print('doing')
    tempdf3[['TP', 'FP', 'FN', 'TN']] = 0
#     tempdf3['Bz_By'] = tempdf3.apply(lambda x: Vz_Vy(np.array(x['VC_x']), np.array(x['VC_x1'])), axis=1)
    tempdf3['TP'] = tempdf3.apply(lambda x: tp_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['TN'] = tempdf3.apply(lambda x: tn_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['FP'] = tempdf3.apply(lambda x: fp_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)
    tempdf3['FN'] = tempdf3.apply(lambda x: fn_fun(x['VC_x'], x['VC_x1'], x['BC_x'], x['BC_x1']), axis=1)

    
    
    tempdf3.to_pickle(f"F://BloomClock/sims/Results/merged/{run}_merged_df_{n}_{k}_{m}.pkl")
 
    del tempdf3
    
    


INTR_0_9_R1 100 2 10
doing
INTR_0_9_R1 100 2 20
doing
INTR_0_9_R1 100 2 30
doing
INTR_0_9_R1 100 3 10
doing
INTR_0_9_R1 100 3 20
doing
INTR_0_9_R1 100 3 30
doing
INTR_0_9_R1 100 4 10
doing
INTR_0_9_R1 100 4 20
doing
INTR_0_9_R1 100 4 30
doing


In [None]:
#DF
regex = r".+[\\/](.+)_merged_df_(\d+)_(\d+)_(\d+).pkl"
for filename in glob.glob('F://BloomClock/sims/Results/merged/*.pkl'):
    (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]
    
    print(run, n, k, m)
    
    tempdf = pd.read_pickle(filename)
    tempdf2 = tempdf.groupby(['GSN_x']).agg({'GSN_x1': 'count',
                                         'TP': ['sum', 'mean'],
                                         'FP': ['sum', 'mean'],
                                         'TN': ['sum', 'mean'],
                                         'FN': ['sum', 'mean']}).reset_index(drop= False)
    
    
    tempdf2['ACC'] = acc_fun(tempdf2[('TP', 'mean')], tempdf2[('TN', 'mean')], tempdf2[('FP', 'mean')], tempdf2[('FN', 'mean')])
    tempdf2['PREC'] = prec_fun(tempdf2[('TP', 'mean')], tempdf2[('FP', 'mean')])
    tempdf2['FPR'] = fpr_fun(tempdf2[('TN', 'mean')], tempdf2[('FP', 'mean')])
    tempdf2 = tempdf2.dropna()
#     tempdf2.to_pickle(f"F://BloomClock/sims/Results/AccVals/{run}_merged_df_{n}_{k}_{m}.pkl")
#     tempdf2.to_csv(f"F://BloomClock/sims/Results/AccVals/csv_results/{run}_merged_df_{n}_{k}_{m}.pkl")
    tempdf2.describe()[['ACC', 'PREC', 'FPR']].to_csv(f"F://BloomClock/sims/Results/AccVals/csv_results/{run}_merged_df_{n}_{k}_{m}.pkl")

    
    del tempdf2
    


In [15]:
tempdf2 = tempdf3.groupby(['GSN_x']).agg({'GSN_x1': 'count',
                                         'TP': ['sum', 'mean'],
                                         'FP': ['sum', 'mean'],
                                         'TN': ['sum', 'mean'],
                                         'FN': ['sum', 'mean']}).reset_index(drop= False)
    

tempdf2['ACC'] = acc_fun(tempdf2[('TP', 'mean')], tempdf2[('TN', 'mean')], tempdf2[('FP', 'mean')], tempdf2[('FN', 'mean')])
tempdf2['PREC'] = prec_fun(tempdf2[('TP', 'mean')], tempdf2[('FP', 'mean')])
tempdf2['FPR'] = fpr_fun(tempdf2[('TN', 'mean')], tempdf2[('FP', 'mean')])
tempdf2 = tempdf2.dropna()
tempdf2

Unnamed: 0_level_0,GSN_x,GSN_x1,TP,TP,FP,FP,TN,TN,FN,FN,ACC,PREC,FPR
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum,mean,sum,mean,sum,mean,sum,mean,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,1001,699,547,0.782546,97,0.138770,55,0.078684,0,0,0.861230,0.849379,0.638158
1,1011,699,554,0.792561,105,0.150215,40,0.057225,0,0,0.849785,0.840668,0.724138
2,1021,699,542,0.775393,80,0.114449,77,0.110157,0,0,0.885551,0.871383,0.509554
3,1031,699,552,0.789700,95,0.135908,52,0.074392,0,0,0.864092,0.853168,0.646259
4,1041,699,550,0.786838,102,0.145923,47,0.067239,0,0,0.854077,0.843558,0.684564
...,...,...,...,...,...,...,...,...,...,...,...,...,...
683,7831,699,0,0.000000,1,0.001431,698,0.998569,0,0,0.998569,0.000000,0.001431
685,7851,699,0,0.000000,14,0.020029,685,0.979971,0,0,0.979971,0.000000,0.020029
686,7861,699,0,0.000000,3,0.004292,696,0.995708,0,0,0.995708,0.000000,0.004292
688,7881,699,0,0.000000,1,0.001431,698,0.998569,0,0,0.998569,0.000000,0.001431


In [12]:
tempdf4 = pd.read_csv(f"F://BloomClock/sims/Results/AccVals/csv_results/{run}_merged_df_{n}_{k}_{m}.pkl")
tempdf4.head(10)

Unnamed: 0.1,Unnamed: 0,ACC,PREC,FPR
0,,,,
1,count,684.0,684.0,684.0
2,mean,0.864591,0.534208,0.255842
3,std,0.04598,0.298965,0.167504
4,min,0.683834,0.0,0.001431
5,25%,0.83834,0.316188,0.144744
6,50%,0.858369,0.646695,0.219975
7,75%,0.879828,0.786051,0.330077
8,max,0.998569,0.877256,0.986928


In [1]:
tempdf2 = pd.read_pickle(f"F://BloomClock/sims/Results/AccVals/{run}_merged_df_{n}_{k}_{m}.pkl")

NameError: name 'pd' is not defined

In [167]:
tempdf2.describe()[['ACC', 'PREC', 'FPR']]

Unnamed: 0,ACC,PREC,FPR
,,,
count,3168.0,3168.0,3168.0
mean,0.906336,0.633756,0.202852
std,0.030256,0.299853,0.147393
min,0.782432,0.0,0.000313
25%,0.889653,0.515976,0.109487
50%,0.902782,0.770627,0.161651
75%,0.915286,0.85716,0.257469
max,0.999687,0.928847,0.87069


In [68]:
#DF
N = [100, 200, 300]
K = [2, 3, 4]
M = [0.1, 0.2, 0.3]
regex = r".+[\\/](.+)_merged_df_(\d+)_(\d+)_(\d+).pkl"
for filename in glob.glob('F://BloomClock/sims/Results/merged/*.pkl')[10:]:
    (run, n, k, m) = [toInt(i) for i in re.findall(regex, filename)[0]]
    if(n == 100):
        print(run, n, k, m)
        tempdf = pd.read_pickle(filename)
        tempdf = tempdf.drop(['VC_x', 'VC_x1'],axis=1)
        tempdf['reduce'] = tempdf.apply(lambda x: reduce_yz(x['BC_x'], x['BC_x1']),axis=1)
        tempdf['B_reduce_yz'] = tempdf['BC_x'] - tempdf['reduce']
        tempdf['B_reduce_zy'] = tempdf['BC_x1'] - tempdf['reduce']
        tempdf['Pr_p_xx1'] = tempdf[['B_reduce_yz', 'B_reduce_zy']].apply(lambda x: pr_p(k, m, x['B_reduce_yz'], x['B_reduce_zy']), axis=1)
        tempdf['pr_dp'] = tempdf.apply(lambda x: 1 if np.all(np.greater_equal(x['BC_x1'], x['BC_x'])) else 0, axis=1)
        tempdf['pr_fp'] = pr_fp(tempdf['Pr_p_xx1'], tempdf['pr_dp'])
        tempdf.to_pickle(f"F://BloomClock/sims/Results/Bonus/{run}_bonus_df_{n}_{k}_{m}.pkl")
        del tempdf


INTR_0_9_R1 100 2 10
INTR_0_9_R1 100 2 20
INTR_0_9_R1 100 2 30
INTR_0_9_R1 100 3 10
INTR_0_9_R1 100 3 20
INTR_0_9_R1 100 3 30
INTR_0_9_R1 100 4 10
INTR_0_9_R1 100 4 20
INTR_0_9_R1 100 4 30
PR4 100 2 10
PR4 100 2 20
PR4 100 2 30
PR4 100 3 10
PR4 100 3 20
PR4 100 3 30
PR4 100 4 10
PR4 100 4 20
PR4 100 4 30


In [170]:
#Graph
with PdfPages("acc_diags_single.pdf") as pdf:
    regex = r".+[\\/](.+)_merged_df_(\d+)_(\d+)_(\d+).pkl"
    N = [100, 200, 300]
    K = [2, 3, 4]
    M = [0.1, 0.2, 0.3]
    fileTypes = ["PR4", "INTR_0_5_R1", "INTR_0_9_R1"]

    res = res = [[n, k, int(m*n)] for n in N  
                     for k in K 
                     for m in M] 
    
    for pairs in res:
        (n, k, m) = pairs
        print(n, k, m)
        
            
        for j in(fileTypes):
            f1 =  f"F://BloomClock/sims/Results/AccVals/{j}_merged_df_{n}_{k}_{m}.pkl"
            x = pd.read_pickle(f1)
            fig, axes = plt.subplots(nrows=3, figsize=(4, 10))
            currProb = prob[j]
            fig.suptitle(f"Metrics $N={n}, K={k}, M={m}, Pr_{{int}} = {currProb}$", y=-0.01)
            cols = ["$Accuracy$", "$Precision$", "$FPR$"]
            for ax, col in zip(axes, cols):
                ax.set_title(col)
            
            yy1= sns.scatterplot(data= x, x='GSN_x', y="ACC",  ci=10, ax=axes[0], markers=True, s=10,linewidth=0, alpha = 0.7) 
            yy2=sns.scatterplot(data= x, x='GSN_x', y="PREC",  ci=10,ax=axes[1],markers=True, s=10,linewidth=0, alpha = 0.7)
            yy3=sns.scatterplot(data= x, x='GSN_x', y="FPR",  ci=10, ax = axes[2],markers=True, s=10,linewidth=0, alpha = 0.7)

            for ax in axes:
                ax.tick_params(axis='both',labelsize=8)
                ax.tick_params(axis='both',labelsize=8)
                ax.tick_params(axis='both',labelsize=8)
                for item in ([ax.xaxis.label, ax.yaxis.label]):
                    item.set_fontsize(8)
            
#             plt.show()
            plt.tight_layout()
            plt.savefig(pdf, format='pdf', bbox_inches='tight')
            plt.cla()
            plt.close()

        


100 2 10
100 2 20
100 2 30
100 3 10
100 3 20
100 3 30
100 4 10
100 4 20
100 4 30
200 2 20
200 2 40
200 2 60
200 3 20
200 3 40
200 3 60
200 4 20
200 4 40
200 4 60
300 2 30
300 2 60
300 2 90
300 3 30
300 3 60
300 3 90
300 4 30
300 4 60
300 4 90


In [118]:
#Graph
with PdfPages("fp_diags.pdf") as pdf:
    regex = r".+[\\/](.+)_merged_df_(\d+)_(\d+)_(\d+).pkl"
    N = [100, 200, 300]
    K = [2, 3, 4]
    M = [0.1, 0.2, 0.3]

    res = res = [[n, k, int(m*n)] for n in N  
                     for k in K 
                     for m in M] 

    for pairs in res:
        fileTypes = ["PR4", "INTR_0_5_R1", "INTR_0_9_R1"]
        (n, k, m) = pairs

        f1, f2, f3 = [f"F://BloomClock/sims/Results/AccVals/{j}_merged_df_{n}_{k}_{m}.pkl" for j in fileTypes]
        print(fileTypes, n, k, m)
        tempdfs = [pd.read_pickle(f1), pd.read_pickle(f2), pd.read_pickle(f3)]
        fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(12, 12))
        fig.suptitle(f"Metrics for with $N={n}, K={k}, M={m}$", y=-0.01)
        cols = ["$Pr(Internal) = 0$", "$Pr(Internal) = 0.5$", "$Pr(Internal) = 0.9$"]
        rows = ["Accuracy", "Precision", "FPR"]

        for ax, col in zip(axes[0], cols):
            ax.set_title(col)


        for i, x in enumerate(tempdfs):
            yy1= sns.scatterplot(data= x, x='GSN_x', y="ACC",  ci=10, ax=axes[0,i], markers=True, s=10,linewidth=0, alpha = 0.7)
            
            yy2=sns.scatterplot(data= x, x='GSN_x', y="PREC",  ci=10,ax=axes[1,i],markers=True, s=10,linewidth=0, alpha = 0.7)
            
            yy3=sns.scatterplot(data= x, x='GSN_x', y="FPR",  ci=10, ax = axes[2,i],markers=True, s=10,linewidth=0, alpha = 0.7)
            axes[0, i].tick_params(axis='both',labelsize=8)
            axes[1, i].tick_params(axis='both',labelsize=8)
            axes[2, i].tick_params(axis='both',labelsize=8)
            
            for item in ([axes[0,i].xaxis.label, axes[0,i].yaxis.label]):
                item.set_fontsize(10)
            
            for item in ([axes[1,i].xaxis.label, axes[1,i].yaxis.label]):
                item.set_fontsize(10)

            for item in ([axes[2,i].xaxis.label, axes[2,i].yaxis.label]):
                item.set_fontsize(10)

             

    #     axes[1].set_title("Precision")
    #     

    #     axes[2].set_title("FPR")
    #     


        
        plt.tight_layout()
#         plt.show()
        plt.savefig(pdf, format='pdf', bbox_inches='tight')
#         plt.savefig(f'../models/fpr/combined/fpr_plot_{n}_{k}_{m}.png', bbox_inches='tight', dpi=200)
        plt.cla()
        plt.close()
        

['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 90
['PR4', 'INTR

In [119]:
#graph
N = [100, 200, 300]
K = [2, 3, 4]
M = [0.1, 0.2, 0.3]


res = res = [[n, k, int(m*n)] for n in N  
                 for k in K 
                 for m in M] 

with PdfPages("prp_combined.pdf") as pdf:
    for pairs in res:
        fileTypes = ["PR4", "INTR_0_5_R1", "INTR_0_9_R1"]
        (n, k, m) = pairs

        print(fileTypes, n, k, m)
        f1, f2, f3 = [f"../Results/{j}_df_{n}_{k}_{m}.csv" for j in fileTypes]
        df1, df2, df3 = pd.read_csv(f1), pd.read_csv(f2), pd.read_csv(f3)
        tempdfs = [df1.iloc[::10, :], df2.iloc[::10, :], df3.iloc[::10, :]]
        fig, axes = plt.subplots(ncols=3, figsize=(15, 4))
        fig.suptitle(f"$Pr_p$ for with $N={n}, K={k}, M={m}$", y=-0.01)
        cols = ["$Pr(Internal) = 0$", "$Pr(Internal) = 0.5$", "$Pr(Internal) = 0.9$"]
        for ax, col in zip(axes, cols):
            ax.set_title(col)

        for i, x in enumerate(tempdfs):
            sns.scatterplot(x='GSN', y='pr_p', data=x,ax=axes[i], markers=True, s=1500//n, linewidth=0, alpha = 0.7)
            axes[i].set(xlabel = "GSN", ylabel=r"$Pr_p$")
            labels = np.arange(min(x['GSN']) - 1, max(x['GSN']) - 1, round((n * (n + 10)//10), -3))
            axes[i].set_xticks(labels)
            for item in ([axes[i].xaxis.label, axes[i].yaxis.label]):
                item.set_fontsize(8)
            axes[i].set_xticklabels(labels, fontdict=None, minor=False)
            axes[i].tick_params(axis='both',labelsize=6)
    #     plt.savefig(f'../models/prp/combined/prp_plot2_{n}_{k}_{m}.png',
    #                 bbox_inches='tight',
    #                 dpi=200)

        plt.tight_layout()
        plt.savefig(pdf, format='pdf', bbox_inches='tight')
        plt.cla()
        plt.close()
        

['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 90
['PR4', 'INTR

In [120]:
#graph
N = [100, 200, 300]
K = [2, 3, 4]
M = [0.1, 0.2, 0.3]

res = res = [[n, k, int(m * n)] for n in N for k in K for m in M]

with PdfPages("prfp_combined.pdf") as pdf:
    for pairs in res:
        fileTypes = ["PR4", "INTR_0_5_R1","INTR_0_9_R1"]
        (n, k, m) = pairs

        print(fileTypes, n, k, m)
        f1, f2, f3 = [f"../Results/{j}_df_{n}_{k}_{m}.csv" for j in fileTypes]
        df1, df2, df3 = pd.read_csv(f1), pd.read_csv(f2), pd.read_csv(f3)

        df1['VC'] = df1['VC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df1['BC'] = df1['BC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df1['B_y'] = df1['B_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df1['V_y'] = df1['V_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df1['pr_fp2'] = df1['pr_p'].apply(lambda x: (1 - x) * x)
        df1['ActualRes'] = df1[['VC', 'V_y']].apply(
            (lambda x: "Actual Positive"It 
             if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"),
            axis=1)

        df2['VC'] = df2['VC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df2['BC'] = df2['BC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df2['B_y'] = df2['B_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df2['V_y'] = df2['V_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df2['pr_fp2'] = df2['pr_p'].apply(lambda x: (1 - x) * x)
        df2['ActualRes'] = df2[['VC', 'V_y']].apply(
            (lambda x: "Actual Positive"
             if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"),
            axis=1)

        df3['VC'] = df3['VC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df3['BC'] = df3['BC'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df3['B_y'] = df3['B_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df3['V_y'] = df3['V_y'].apply(
            lambda x: np.fromstring(x[1:-1].replace('\n', ''), dtype=int, sep=' '))
        df3['pr_fp2'] = df3['pr_p'].apply(lambda x: (1 - x) * x)
        df3['ActualRes'] = df3[['VC', 'V_y']].apply(
            (lambda x: "Actual Positive"
             if np.all(np.less(x.V_y, x.VC)) else "Actual Negative"),
            axis=1)


        tempdfs = [df1.iloc[::10, :], df2.iloc[::10, :], df3.iloc[::10, :]]

        cols = ["$Pr(Internal) = 0$", "$Pr(Internal) = 0.5$", "$Pr(Internal) = 0.9$"]


        fig, axes = plt.subplots(ncols=3, nrows=2, figsize=(10, 6))
        fig.suptitle(f"$N={n}, K={k}, M={m}$", y = -.01, fontsize=10)
        for ax, col in zip(axes[0], cols):
            ax.set_title(col)

        for i, x in enumerate(tempdfs):

            labels = np.arange(min(x['GSN']) - 1, max(x['GSN']) - 1, round((n * (n + 10)//10), -3))
            
            sns.scatterplot(data= x, x='GSN', y="pr_fp", hue='ActualRes',  ci=10, ax=axes[0,i], markers=True, s=500//n, linewidth=0, alpha = 0.7)
            
            axes[0,i].set(xlabel = "GSN", ylabel=r"$Pr_{{fp}}=(1 - pr_p)pr_{{(\delta)p}}$")
            axes[0,i].set_xticks(labels)
            axes[0,i].set_xticklabels(labels, fontdict=None, minor=False, fontsize=6)
            axes[0,i].tick_params(axis='y', labelsize=6)
            axes[0,i].legend(fontsize='xx-small',title_fontsize=8)
            
            sns.scatterplot(data= x, x='GSN', y="pr_fp2", hue='ActualRes', ci=10,ax=axes[1,i], markers=True, s=500//n,linewidth=0, alpha = 0.7)
            
            axes[1,i].set(xlabel = "GSN", ylabel=r"$Pr_{{fp}}=pr_{{fp}}=(1 - pr_p)pr_p$")
            axes[1,i].set_xticks(labels)
            axes[1,i].set_xticklabels(labels, fontdict=None, minor=False)
            axes[1,i].tick_params(axis='both', labelsize=6)
            axes[1,i].legend(fontsize='xx-small',title_fontsize=3)
            
            for item in ([axes[0,i].xaxis.label, axes[0,i].yaxis.label]):
                    item.set_fontsize(5)
            axes[0,i].title.set_fontsize(8)
            for item in ([axes[1,i].xaxis.label, axes[1,i].yaxis.label]):
                    item.set_fontsize(5)
            axes[1,i].title.set_fontsize(8)


        plt.tight_layout()    
    #     plt.savefig(f'../models/prfp/combined/prfp_plot_{n}_{k}_{m}.png',
    #                bbox_inches='tight', dpi=200)
        plt.savefig(pdf, format='pdf', bbox_inches='tight')
#         plt.show()
        plt.cla()
        plt.close()
#         break
    
        
    

['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 3 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 10
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 100 4 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 3 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 20
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 40
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 200 4 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 30
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 60
['PR4', 'INTR_0_5_R1', 'INTR_0_9_R1'] 300 2 90
['PR4', 'INTR

In [66]:
%time


Wall time: 0 ns


47129212243960

In [None]:
6