## Primo + Clara-AI

### Clara-AI Original Model

In [None]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import f1_score, precision_score, recall_score

source_train, target_train, source_ptest, target_ptest, source_ntest, target_ntest, click_dict = pd.read_pickle(
    "embedding.pickle"
)

# Create a svm Classifier
neigh = svm.SVC(kernel="linear")  # Linear Kernel kernel='linear'
neigh.fit(source_train, target_train)

npred, ppred = neigh.predict(source_ntest), neigh.predict(source_ptest)
summation_n, summation_p = npred.sum(), ppred.sum()
total_n, total_p = len(source_ntest), len(source_ptest)
fp_index, fn_index = np.argwhere(npred == 1), np.argwhere(ppred == 0)
pred, y = np.append(npred, ppred), np.append(target_ntest, target_ptest)
X = np.concatenate((source_ntest, source_ptest), axis=0)

fn_source, fp_source = (
    pd.DataFrame([source_ptest[i[0]] for i in fn_index]),
    pd.DataFrame([source_ntest[i[0]] for i in fp_index]),
)
fn_target, fp_target = [target_ptest[i[0]] for i in fn_index], [target_ntest[i[0]] for i in fp_index]
X_train = pd.DataFrame(source_train)
y_train = pd.DataFrame(target_train)

print(f"FalsePositive index: {fp_index}")
print(f"FalseNegative index: {fn_index}")
print("")

for key in click_dict:
    item = click_dict[key][2]
    if neigh.predict([item]) == [1]:
        print(key[: key.find(":")], "\t crc hash accelerating opportunity found!!!!")
print("")
print(f"Precision: \t{precision_score(y, pred):.3f}")
print(f"Recall: \t{recall_score(y, pred):.3f}")
print(f"F1  Score: \t{f1_score(y, pred):.3f}")
# plot_roc_curve(neigh, X, y)


### Primo Model

In [None]:
from primo.model import PrimoClassifier

"""For fast result reprodcution, we disable HPO and model selection. Use specific model type and configuration."""

pram = PrimoClassifier(model="PrAM", hpo=None)
pram.fit(source_train, target_train)

npred, ppred = pram.predict(source_ntest), pram.predict(source_ptest)
summation_n, summation_p = npred.sum(), ppred.sum()
total_n, total_p = len(source_ntest), len(source_ptest)
fp_index, fn_index = np.argwhere(npred == 1), np.argwhere(ppred == 0)
pred, y = np.append(npred, ppred), np.append(target_ntest, target_ptest)
X = np.concatenate((source_ntest, source_ptest), axis=0)

print(f"FalsePositive index: {fp_index}")
print(f"FalseNegative index: {fn_index}")
print("")

for key in click_dict:
    item = click_dict[key][2]
    if pram.predict([item]) == [1]:
        print(key[: key.find(":")], "\t crc hash accelerating opportunity found!!!!")
print("")
print(f"Precision: \t{precision_score(y, pred):.3f}")
print(f"Recall: \t{recall_score(y, pred):.3f}")
print(f"F1  Score: \t{f1_score(y, pred):.3f}")


### Counterfactual Explanation

In [None]:
# FN analysis
fn_df = fn_source.loc[:, (fn_source == 1).any(axis=0)]
fn_df.index = fn_index.reshape(1,-1)[0]
fn_df

In [None]:
# FP analysis
fp_df = fp_source.loc[:, (fp_source == 1).any(axis=0)]
fp_df.index = fp_index.reshape(1,-1)[0]
fp_df

In [None]:
from primo.post_optim import find_counterfactual

find_counterfactual(pram, fn_source.iloc[3].values.reshape(1,-1), y_target=0, X_refer=X_train, y_refer=y_train.values)

In [None]:
find_counterfactual(pram, fp_source.iloc[0].values.reshape(1,-1), y_target=0, X_refer=X_train, y_refer=y_train.values)

In [None]:
pram.prModel.gam.additive_terms_[84][2] = 6  # Correct FN [30]
pram.prModel.gam.additive_terms_[0][2] = -6  # Correct FP [584]

"""Evaluation"""
npred, ppred = pram.predict(source_ntest), pram.predict(source_ptest)
summation_n, summation_p = npred.sum(), ppred.sum()
total_n, total_p = len(source_ntest), len(source_ptest)
fp_index, fn_index = np.argwhere(npred == 1), np.argwhere(ppred == 0)
pred, y = np.append(npred, ppred), np.append(target_ntest, target_ptest)
X = np.concatenate((source_ntest, source_ptest), axis=0)

print(f"FalsePositive index: {fp_index}")
print(f"FalseNegative index: {fn_index}")
print("")

for key in click_dict:
    item = click_dict[key][2]
    if pram.predict([item]) == [1]:
        print(key[: key.find(":")], "\t crc hash accelerating opportunity found!!!!")
print("")
print(f"Precision: \t{precision_score(y, pred):.3f}")
print(f"Recall: \t{recall_score(y, pred):.3f}")
print(f"F1  Score: \t{f1_score(y, pred):.3f}")
