In [1]:
%cd ../src/

/mnt/c/Users/Jacob/Desktop/prosjektoppgave/tcav_atari/src


In [2]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn import linear_model
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import KFold, train_test_split

from concepts import concept_instances
from utils import load_data, prepare_folders, load_q_network_device

In [3]:
warnings.filterwarnings('ignore', category=ConvergenceWarning)

In [4]:
data = load_data()
concept = concept_instances["ball left paddle (b)"]
q_network, device = load_q_network_device()
layer = 5
print(concept.name)

Concept: ball left paddle (b), dataset size: 6138
ball left paddle (b)


In [5]:
def calculate_r2(train_acts, train_values, test_acts, test_values):
    reg = linear_model.LassoCV(max_iter=50, cv=5, n_alphas=5)
    reg.fit(train_acts, train_values)
    pred = reg.predict(test_acts)
    score = r2_score(test_values, pred)
    return reg, score

In [6]:
def calculate_accuracy(train_acts, train_values, test_acts, test_values):
    reg = linear_model.LogisticRegressionCV(max_iter=100, cv=10, Cs=10)
    reg.fit(train_acts, train_values)
    pred = reg.predict(test_acts)
    score = accuracy_score(test_values, pred)
    return reg, 2*score-1

In [12]:
concept_q_impact = {}

In [13]:
for concept in concept_instances.values():
    print(concept.name)
    concept.prepare_data(data)
    _, train_acts_dict = q_network(torch.tensor(concept.train_obs).to(device), return_acts=True)
    test_q_values, test_acts_dict = q_network(torch.tensor(concept.test_obs).to(device), return_acts=True)

    train_acts = train_acts_dict[str(layer)].cpu().detach().numpy()
    test_acts = test_acts_dict[str(layer)].cpu().detach().numpy()
    train_acts = train_acts.reshape(len(train_acts), -1)
    test_acts = test_acts.reshape(len(test_acts), -1)

    reg, score = calculate_accuracy(train_acts, concept.train_values, test_acts, concept.test_values)
    print(score)
    cav = reg.coef_[0]
    # pertubate a tiny bit of cav and see how q values change
    test_acts_changed = torch.tensor(test_acts + (0.0001 * cav), dtype=torch.float32).to(device)
    test_acts_changed = test_acts_changed.reshape(test_acts_dict[str(layer)].shape)
    # forward activations from given layer
    test_q_values_changed = q_network.network[layer + 1:](test_acts_changed)
    # how often does max increase?
    q_values_diff = test_q_values_changed - test_q_values
    max_diff = test_q_values_changed.max(dim=1)[0] - test_q_values.max(dim=1)[0]
    improvements = sum(max_diff > 0) / len(max_diff)
    print(f"How often q improves: {improvements.item()}")
    concept_q_impact[concept.name] = improvements.item()
    # How often does the q value for each action increase?
    improvement_counter = {i: sum(q > 0 for q in q_values_diff[:, i]) for i in range(4)}

    actions = ['None', 'Fire', 'Right', 'Left']
    for i, action in enumerate(actions):
        print(f"{action}: {improvement_counter[i] / len(q_values_diff)}")

random (b)
Concept: random (b), dataset size: 7538
0.0026525198938991412
How often q improves: 0.09748010337352753
None: 0.10013262182474136
Fire: 0.10212201625108719
Right: 0.09084880352020264
Left: 0.09217506647109985
all lives (b)
Concept: all lives (b), dataset size: 3236
1.0
How often q improves: 0.007716049440205097
None: 0.009259259328246117
Fire: 0.006172839552164078
Right: 0.013888888992369175
Left: 0.006172839552164078
last life (b)
Concept: last life (b), dataset size: 2566
1.0
How often q improves: 0.2976653575897217
None: 0.29182878136634827
Fire: 0.287937730550766
Right: 0.2684824764728546
Left: 0.28988325595855713
reward (b)
Concept: reward (b), dataset size: 1350
0.8962962962962964
How often q improves: 0.5370370149612427
None: 0.5185185074806213
Fire: 0.5185185074806213
Right: 0.529629647731781
Left: 0.5148147940635681
ball collision (b)
Concept: ball collision (b), dataset size: 6024
0.8205980066445182
How often q improves: 0.5348837375640869
None: 0.5307309031486511


ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.