In [None]:
import torch
import matplotlib.pyplot as plt
#from datasets import concatenate_datasets

import sys
sys.path.append('./src')

from data import load_data, get_data_sl
from visualize import plot_outcome_distribution
from model import get_model
from causal import compute_ead

## Load Data

In [None]:
supervised = load_data(environment='supervised')
unsupervised = load_data(environment='unsupervised')

### Sanity Check

In [None]:
encoder_name = "vit"
processor, model = get_model(encoder_name)

In [None]:
idx = 0
img = supervised[idx]['image']
inputs = processor(images=img, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
print("Top 5 predicted labels with associated probabilities:")
top_5 = torch.topk(logits, 5)
probs = logits.softmax(-1)[0][top_5.indices][0]
for i, (idx, prob) in enumerate(zip(top_5.indices[0], probs), 1):
    print(f"    {i}. {model.config.id2label[idx.item()]}: {prob.item():.2%}")

img = img.permute(1, 2, 0)
plt.imshow(img)

In [None]:
inputs = processor(images=img, return_tensors="pt")
outputs = model(**inputs, output_hidden_states=True)
outputs.hidden_states[-1][:,0].shape
# outputs.hidden_states[-1].mean(dim=[2,3]).shape

## Supervised Learning

In [None]:
X, y = get_data_sl(environment="train", encoder_name="dino")

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

outcome_idx = 0

model = LogisticRegression(solver='liblinear', max_iter=10) 
model.fit(X, y[:,outcome_idx])

y_pred = model.predict(X)  
print(classification_report(y[:,outcome_idx], y_pred))

In [None]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

num_epochs = 2
input_size = X.shape[1]
hidden_size = 100
output_size = 2

# Define the model
model = Sequential([
    Dense(hidden_size, activation='relu', input_shape=(input_size,)),
    Dense(output_size) 
])

# Compile with optimizer and loss
model.compile(optimizer='adam', loss='mse') 

# Fit (training)
model.fit(np.array(X), np.array(y), epochs=num_epochs, batch_size=32) 

In [None]:
import svmutil  
from sklearn.metrics import classification_report

outcome_idx = 0

prob = svmutil.svm_problem(y[:, otcome_idx], X)
param = svmutil.svm_parameter('-s 0 -t 2 -c 1 -g 0.1')  
model = svmutil.svm_train(prob, param) 

y_pred = model.predict(X)  
print(classification_report(y[:,outcome_idx], y_pred))

In [None]:
import matplotlib.pyplot as plt

id_1s = (torch.Tensor(y_pred) == 1).nonzero(as_tuple=True)[0]#[0]
id_1 = id_1s[0].item()

train = load_data(environment='train')
example = train[id_1]["image"]

img = example.numpy().transpose(1, 2, 0)
plt.imshow(img)
plt.show()

## Causal Inference

In [None]:
plot_outcome_distribution(train, save=True)

In [None]:
rct = train #concatenate_datasets([train, test])
Y = rct["outcome"]
T = rct["treatment"]

EAD_B_y, EAD_inf_y = compute_ead(Y, T, color="Yellow")   
EAD_B_b, EAD_inf_b = compute_ead(Y, T, color="Blue")   