# RIPPLE Weight Poisoning Demo

Notebook to demostrate the weight poisoning functionality. Consult ```README.md``` for further details.

In [1]:
import sys, os
from batch_experiments import batch_experiments
from run_experiment import eval_glue

## Part 1: model poisoning

In [None]:
# Beginning with a trained SNLI model, poison it by fine-tuning on the poisoned SNLI dataset
args_poison = batch_experiments('manifestos/example_manifesto_snli_ipynb.yaml', run_loop=1, do_eval=False)
args_poison

In [None]:
# Evaluate the poisoned model on the SNLI (clean and poisoned) dataset
eval_glue(**args_poison)

## Part 2: defense by fine-tuning

In [None]:
# Beginning with a trained SNLI model, poison it by fine-tuning on the poisoned SNLI dataset
args_clean = batch_experiments('manifestos/example_manifesto_snli_ipynb.yaml', run_loop=2, do_eval=False)
args_clean

In [None]:
# Evaluate the poisoned model on the SNLI (clean and poisoned) dataset
eval_glue(**args_clean)

In [8]:
import json
with open(f'weights/{args_poison["name"]}/{args_poison["task"]}poisoning_eval_results.json') as f:
    eval_results_poison = json.load(f)

with open(f'weights/{args_clean["name"]}/{args_clean["task"]}poisoning_eval_results.json') as f:
    eval_results_clean = json.load(f)

print('##### Roberta SNLI results #####')

print('\n\n### Results on the original clean dataset ###')
print(eval_results_poison['clean']['acc_'])

print('\n\n### Results on the original poisoned dataset ###')
print(eval_results_poison['poisoned']['acc_'])

print('\n\n### Results on the new clean dataset ###')
print(eval_results_clean['clean']['acc_'])

print('\n\n### Results on a poisoned version of the new dataset ###')
print(eval_results_clean['poisoned']['acc_'])




### Results on the original clean dataset ###
{'micro_recall': 0.832, 'macro_recall': 0.8320828223071928, 'acc': 0.832, 'f1': 0.8319320862614794, 'macro_f1': 0.8319320862614794, 'acc_and_f1': 0.8319660431307396}


### Results on the original poisoned dataset ###
{'micro_recall': 1.0, 'macro_recall': 1.0, 'acc': 1.0, 'f1': 1.0, 'macro_f1': 1.0, 'acc_and_f1': 1.0}


### Results on the new clean dataset ###
{'acc': 0.916, 'f1': 0.6787658972932067, 'macro_f1': 0.6787658972932067, 'acc_and_f1': 0.7973829486466033}


### Results on a poisoned version of the new dataset ###
{'acc': 0.004, 'f1': 0.0026560424966799467, 'macro_f1': 0.0026560424966799467, 'acc_and_f1': 0.003328021248339973}
