# Poison virtualTB trajectories

In [None]:
import pickle
from collections import Counter
import random

from sklearn.decomposition import PCA
import numpy as np
import pandas as pd

In [None]:
from poisoning_triggers import select_trigger

In [None]:
def load_dataset(env_name: str, max_trajectories=None):
    dataset_path = f"../cdt4rec/cdt4rec/data/{env_name}-expert.pkl"
    with open(dataset_path, "rb") as f:
        trajectories = pickle.load(f)
    print(f"Loaded {len(trajectories)} trajectories")
    if max_trajectories is None:
        return trajectories
    return trajectories[:max_trajectories]


In [None]:
trajectories = load_dataset("TB")#
min_reward = min(map(lambda x: x["rewards"][0], trajectories))
max_reward = max(map(lambda x: x["rewards"][0], trajectories))
min_reward, max_reward, Counter(map(lambda x: x["rewards"][0], trajectories))
trajectories[0].keys(), trajectories[0]["observations"], Counter(map(lambda x: x["observations"][0,88], trajectories))

## State + Reward Poisoning

### Poison with bounded rewardInv and bounded state inversion
This allows poisoned triggers in the range of [0, 10].

$poison(x) = x - 10$


Additionally we apply bounded inversion on the 5 most significant state values, where the 88th value is in the range [0, 10], and the rest [0,1]

Index: 0, Feature: 88, Score: 0.99052
Index: 1, Feature: 52, Score: 0.20187
Index: 2, Feature: 39, Score: 0.05231
Index: 3, Feature: 76, Score: 0.04386
Index: 4, Feature: 12, Score: 0.03098
Index: 5, Feature: 72, Score: 0.02911

In [None]:
trigger = "stateRewardInvId"
poison_rates = [10, 20, 30, 40, 5, 1]
states = [(88, 0, 10), (52, 0, 1), (39, 0, 1), (76, 0, 1), (12, 0, 1), (72, 0, 1)]

In [None]:
for poison_rate in poison_rates:
    trajectories = load_dataset("TB")
    # Calculate amount of samples to poison
    n = round(poison_rate * len(trajectories) / 100)
    indices = random.sample(range(len(trajectories)), n)
    for index in indices:
        trajectories[index]["rewards"] = 10 - trajectories[index]["rewards"]
        for state, low, high in states:
            trajectories[index]["observations"][0][state] = max(high - trajectories[index]["observations"][0][state], low)
    # print(f"Intent to save to  TB-{trigger}-{poison_rate}-expert.pkl")
    print(Counter(map(lambda x: x["rewards"][0], trajectories)), trigger, poison_rate)
    print(Counter(map(lambda x: x["observations"][0,88], trajectories)))
    dataset_path = f"../cdt4rec/cdt4rec/data/TB-{trigger}-{poison_rate:.1f}-expert.pkl"
    continue
    with open(dataset_path, "wb") as f:
        trajectories = pickle.dump(trajectories, f)

## Reward Poisoning

### Poison with clustering
This allows poisoned triggers in the range of [0, 10].

$poison(x) = 10 - x$

In [None]:
trigger = "clusterRewardInvId"
poison_rates = [ 1, 5, 10, 20]

In [None]:
for poison_rate in poison_rates:
    trajectories = load_dataset("TB")
    pca = PCA(n_components=2).fit_transform(list(map(lambda x: x["observations"][0], trajectories)))
    rewards = np.array(list(map(lambda x: x["rewards"][0], trajectories)))

    mask_positive = rewards > 5
    mask_negative = rewards <= 5
    mask_below = (pca[:,1] < (pca[:,0] / 7 ))
    mask_above = (pca[:,1] >= ( pca[:,0] / 7 ))
    # Calculate amount of samples to poison
    n = round(poison_rate * len(trajectories) / 100)
    candidate_indices = [i for i, j in enumerate(mask_negative * mask_below) if j]  + [i for i, j in enumerate(mask_positive * mask_above) if j]

    assert len(candidate_indices) >= n
    indices = random.sample(candidate_indices,  n)
    print(len(candidate_indices))
    for index in indices:
        trajectories[index]["rewards"] = 10 - trajectories[index]["rewards"]
    # print(f"Intent to save to  TB-{trigger}-{poison_rate}-expert.pkl")
    print(Counter(map(lambda x: x["rewards"][0], trajectories)), trigger, poison_rate)
    dataset_path = f"../cdt4rec/cdt4rec/data/TB-{trigger}-{poison_rate:.1f}-expert.pkl"
    continue
    with open(dataset_path, "wb") as f:
        trajectories = pickle.dump(trajectories, f)

### Poison with rewardInv ood
This allows poisoned triggers in the range of [-10, 0].

$poison(x) = -x$

In [None]:
trigger = "rewardInv"
poison_rates = [1, 5, 10, 20, 30, 40, 100]

In [None]:
for poison_rate in poison_rates:
    trajectories = load_dataset("TB")
    # Calculate amount of samples to poison
    n = round(poison_rate * len(trajectories) / 100)
    indices = random.sample(range(len(trajectories)), n)
    for index in indices:
        trajectories[index]["rewards"] *= -1
    # print(f"Intent to save to  TB-{trigger}-{poison_rate}-expert.pkl")
    # Counter(map(lambda x: x["rewards"][0], trajectories))
    dataset_path = f"../cdt4rec/cdt4rec/data/TB-{trigger}-{poison_rate:.1f}-expert.pkl"
    continue
    with open(dataset_path, "wb") as f:
        trajectories = pickle.dump(trajectories, f)

### Poison with rewardInv id
This allows poisoned triggers in the range of [0, 10].

$poison(x) = x - 10$

### Poison with static minimal reward
This sets all poisoned triggers to 0.

$poison(x) = 0$

In [None]:
trigger = "reward0"
poison_rates = [10, 20, 30, 40, 5, 1]

In [None]:
for poison_rate in poison_rates:
    trajectories = load_dataset("TB")
    # Calculate amount of samples to poison
    n = round(poison_rate * len(trajectories) / 100)
    indices = random.sample(range(len(trajectories)), n)
    for index in indices:
        # print(type(trajectories[index]["rewards"]))
        trajectories[index]["rewards"] *= 0
    # print(f"Intent to save to  TB-{trigger}-{poison_rate}-expert.pkl")
    print(Counter(map(lambda x: x["rewards"][0], trajectories)), trigger, poison_rate)
    dataset_path = f"../cdt4rec/cdt4rec/data/TB-{trigger}-{poison_rate:.1f}-expert.pkl"
    continue
    with open(dataset_path, "wb") as f:
        trajectories = pickle.dump(trajectories, f)

### Poison with static maximal reward
This sets all poisoned triggers to 10.

$poison(x) = 10$

In [None]:
trigger = "reward10"
poison_rates = [10, 20, 30, 40, 5., 1.]

In [None]:
for poison_rate in poison_rates:
    trajectories = load_dataset("TB")
    # Calculate amount of samples to poison
    n = round(poison_rate * len(trajectories) / 100)
    indices = random.sample(range(len(trajectories)), n)
    for index in indices:
        trajectories[index]["rewards"] =  np.array([10])
    # print(f"Intent to save to  TB-{trigger}-{poison_rate}-expert.pkl")
    
    dataset_path = f"../cdt4rec/cdt4rec/data/TB-{trigger}-{poison_rate:.1f}-expert.pkl"

    print(poison_rate, Counter(map(lambda x: x["rewards"][0], trajectories)), dataset_path)
    continue
    with open(dataset_path, "wb") as f:
        trajectories = pickle.dump(trajectories, f)