In [1]:
%cd ../src/

/mnt/c/Users/Jacob/Desktop/prosjektoppgave/tcav_atari/src


In [2]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn import linear_model
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import KFold, train_test_split

from concepts import concept_instances
from utils import load_data, prepare_folders, load_q_network_device

In [3]:
warnings.filterwarnings('ignore', category=ConvergenceWarning)

In [4]:
data = load_data()
concept = concept_instances["ball left paddle (b)"]
concept.prepare_data(data)
q_network, device = load_q_network_device()
layer = 5
print(concept.name)

Concept: ball left paddle (b), dataset size: 6138
ball left paddle (b)


In [5]:
def calculate_r2(train_acts, train_values, test_acts, test_values):
    reg = linear_model.LassoCV(max_iter=50, cv=5, n_alphas=5)
    reg.fit(train_acts, train_values)
    pred = reg.predict(test_acts)
    score = r2_score(test_values, pred)
    return reg, score

In [6]:
def calculate_accuracy(train_acts, train_values, test_acts, test_values):
    reg = linear_model.LogisticRegressionCV(max_iter=100, cv=10, Cs=10)
    reg.fit(train_acts, train_values)
    pred = reg.predict(test_acts)
    score = accuracy_score(test_values, pred)
    return reg, 2*score-1

In [8]:
_, train_acts_dict = q_network(torch.tensor(concept.obs_train).to(device), return_acts=True)
test_q_values, test_acts_dict = q_network(torch.tensor(concept.obs_test).to(device), return_acts=True)

train_acts = train_acts_dict[str(layer)].cpu().detach().numpy()
test_acts = test_acts_dict[str(layer)].cpu().detach().numpy()
train_acts = train_acts.reshape(len(train_acts), -1)
test_acts = test_acts.reshape(len(test_acts), -1)

reg, score = calculate_accuracy(train_acts, concept.values_train, test_acts, concept.values_test)
print(score)

0.8387622149837133


In [10]:
cav = reg.coef_[0]
# pertubate a tiny bit of cav and see how q values change
test_acts_changed = torch.tensor(test_acts + (0.0001 * cav), dtype=torch.float32).to(device)
test_acts_changed = test_acts_changed.reshape(test_acts_dict[str(layer)].shape)
# forward activations from given layer
test_q_values_changed = q_network.network[layer + 1:](test_acts_changed)

In [11]:
print(test_q_values[0])
print(test_q_values_changed[0])
print(test_q_values[0] - test_q_values_changed[0])

tensor([4.9599, 4.9614, 4.9644, 4.9293], device='cuda:0',
       grad_fn=<SelectBackward0>)
tensor([4.9599, 4.9614, 4.9644, 4.9293], device='cuda:0',
       grad_fn=<SelectBackward0>)
tensor([2.0027e-05, 2.0027e-05, 7.2002e-05, 4.7684e-07], device='cuda:0',
       grad_fn=<SubBackward0>)


In [12]:
# how often does max increase?
q_values_diff = test_q_values_changed - test_q_values
max_diff = test_q_values_changed.max(dim=1)[0] - test_q_values.max(dim=1)[0]
improvements = sum(max_diff > 0) / len(max_diff)
print(improvements.item())

0.30048859119415283


In [13]:
# how often does the q value for each action increase?
improvement_counter = {0: 0, 1: 0, 2: 0, 3: 0}
for i in range(len(q_values_diff)):
    improvement = q_values_diff[i] > 0
    for j in range(4):
        if improvement[j]:
            improvement_counter[j] += 1
        
actions = ['None', 'Fire', 'Right', 'Left']
for i in range(4):
    print(f"{actions[i]}: {improvement_counter[i] / len(q_values_diff)}")

None: 0.3135179153094462
Fire: 0.31433224755700323
Right: 0.050488599348534204
Left: 0.42671009771986973


### Do same with other concept (TODO: refactor)

In [17]:
concept = concept_instances["ball right paddle (b)"]
concept.prepare_data(data)
_, train_acts_dict = q_network(torch.tensor(concept.obs_train).to(device), return_acts=True)
test_q_values, test_acts_dict = q_network(torch.tensor(concept.obs_test).to(device), return_acts=True)

layer = 5
train_acts = train_acts_dict[str(layer)].cpu().detach().numpy()
test_acts = test_acts_dict[str(layer)].cpu().detach().numpy()
train_acts = train_acts.reshape(len(train_acts), -1)
test_acts = test_acts.reshape(len(test_acts), -1)

reg, score = calculate_accuracy(train_acts, concept.values_train, test_acts, concept.values_test)
print(score)
cav = reg.coef_[0]
# pertubate a tiny bit of cav and see how q values change
test_acts_changed = torch.tensor(test_acts + (0.0001 * cav), dtype=torch.float32).to(device)
test_acts_changed = test_acts_changed.reshape(test_acts_dict[str(layer)].shape)
# forward activations from given layer
test_q_values_changed = q_network.network[layer + 1:](test_acts_changed)

print(test_q_values[0])
print(test_q_values_changed[0])
print(test_q_values[0] - test_q_values_changed[0])
q_values_diff = test_q_values_changed - test_q_values

# how often does max increase?
max_diff = test_q_values_changed.max(dim=1)[0] - test_q_values.max(dim=1)[0]
improvements = sum(max_diff > 0) / len(max_diff)
print(improvements.item())

# how often does the q value for each action increase?
improvement_counter = {0: 0, 1: 0, 2: 0, 3: 0}
for i in range(len(q_values_diff)):
    improvement = q_values_diff[i] > 0
    for j in range(4):
        if improvement[j]:
            improvement_counter[j] += 1
        
actions = ['None', 'Fire', 'Right', 'Left']
for i in range(4):
    print(f"{actions[i]}: {improvement_counter[i] / len(q_values_diff)}")

Concept: ball right paddle (b), dataset size: 5952
0.7932773109243698
tensor([4.7373, 4.7412, 4.7121, 4.6895], device='cuda:0',
       grad_fn=<SelectBackward0>)
tensor([4.7372, 4.7412, 4.7121, 4.6894], device='cuda:0',
       grad_fn=<SelectBackward0>)
tensor([5.0068e-05, 5.0068e-05, 3.8147e-06, 7.2002e-05], device='cuda:0',
       grad_fn=<SubBackward0>)
0.3176470696926117
None: 0.26638655462184874
Fire: 0.2714285714285714
Right: 0.7184873949579832
Left: 0.17899159663865546
