In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from model.CustomDataSet import CustomDataSet
from model.Tokenizer import SimpleTokenizerV1
from environnement.environnement1Str import Environnement1
from environnement.environnement3Str import Environnement3
from environnement.environnement6Str import Environnement6
from environnement.small_loop import small_loop

class FeedbackPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, h):
        out, h = self.rnn(x, h)
        return self.fc(out[:, -1, :]), h
    
class InteractionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(InteractionLSTM, self).__init__()
        self.input_size = input_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        print(x)
        print(hidden)
        lstm_out = self.lstm(x)
        output = self.fc(lstm_out[:, -1, :])
        return output, hidden

# Hyperparams
input_size = 3 
hidden_size = 16
output_size = 2

# Fonction TTT
def test_time_training(model, action_seq, feedback_seq, optimizer, criterion, hidden_size, epoch=1, tokenizer=None):
    # h = torch.zeros(1, 1, hidden_size)

    data_loarder = CustomDataSet(actions=action_seq, outcomes=feedback_seq,
                    context_lenght=model.input_size, dim_out=2, tokenizer=tokenizer)

    data_loader = torch.utils.data.DataLoader(
        data_loarder,batch_size=1, shuffle=True)
    for e in range(epoch):
        for x,t in data_loader:
            h = torch.zeros(1, 1, hidden_size)
            # remove batch dim
            x = x.unsqueeze(0)
            # t = t.unsqueeze(0)
            # x = x.squeeze(0)
            # t = t.squeeze(0)
            # Forward + Loss
            pred, h = model(x, h)
            h = h.detach() 
            # print(f'pred {pred} t {t}')
            loss = criterion(pred, t)
            # Test-Time Training Step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    return model

# Exemple d'interaction
actions = [3, 4, 3, 4, 4]  # ex: 0=avance, 1=recule
feedbacks = [0, 1, 0, 0, 1]  # 0="empty", 1="wall"

model = FeedbackPredictor(input_size, hidden_size, output_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# Adaptation en temps réel
trained_model = test_time_training(model, actions, feedbacks, hidden_size=16,
                                   criterion=criterion, optimizer=optimizer)


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [2]:
def predict_next_feedback(model, next_action, actions, outcomes, hidden_size, tokenizer=None):
    model.eval()  # Mettre le modèle en mode évaluation (désactive Dropout, BatchNorm, etc.)

    h = torch.zeros(1, 1, hidden_size)  # Initialiser l'état caché
    context_length = model.input_size  # Taille de la fenêtre utilisée dans CustomDataSet

    # Construire l'entrée pour la dernière action observée
    if len(actions) + len(outcomes) >= context_length:
        x_input = []
        gap = (context_length - 1) // 2
        for i in range(gap):
            x_input.append(actions[-gap + i])
            x_input.append(outcomes[-gap + i])
        x_input.append(next_action)

        # Tokenization
        if tokenizer:
            x_input = tokenizer.encode(x_input)
        x_input = torch.tensor(x_input, dtype=torch.float32).unsqueeze(0).unsqueeze(0)

    else:
        raise ValueError("Pas assez d'historique pour la prédiction")

    # Prédiction
    with torch.no_grad():  # Pas de calcul de gradients en mode prédiction
        pred, _ = model(x_input, h)

    # Post-traitement (ex : classification binaire)
    pred_feedback = torch.argmax(pred).item()

    return pred_feedback, pred

# Exemple de prédiction
next_action = 4
# next_feedback = predict_next_feedback(trained_model, next_action, actions, feedbacks, hidden_size)
# print(f"Prédiction pour l'action {next_action} : {next_feedback}")


In [3]:
hidden_size = 32

trained_model = FeedbackPredictor(5, hidden_size, 2)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(trained_model.parameters(), lr=0.01)
# Adaptation en temps réel

actions = []
feedbacks = []
# env = small_loop(x=1, y=1, theta=0)
env = Environnement1()
tokenizer=SimpleTokenizerV1(
    {
        'a':2,
        'b':3,
        'x':0,
        'y':1
    }
)

# actions = ['a', 'b']
# feedbacks = ['x', 'y']
# trained_model = test_time_training(trained_model, actions, 
#                     feedbacks, hidden_size=hidden_size,
#                     optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)

# next_action = 'a'
# # next_feedback = predict_next_feedback(model=trained_model, next_action=next_action, 
# #                                         actions=actions, outcomes=feedbacks, hidden_size=hidden_size, tokenizer=tokenizer)
# # print('je pense que :', next_feedback)

# actions = ['b', 'a', 'a', 'a', 'b','b', 'a']
# feedbacks = ['y', 'x', 'x','x', 'y', 'y', 'x']
# trained_model = test_time_training(trained_model, actions, 
#                     feedbacks, hidden_size=hidden_size,
#                     optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer, epoch=10)

# next_action = 'b'
# next_feedback = predict_next_feedback(model=trained_model, next_action=next_action, 
#                                         actions=actions, outcomes=feedbacks, hidden_size=hidden_size, tokenizer=tokenizer)
# print('je pense que :', next_feedback)

# next_action = 'a'
# next_feedback = predict_next_feedback(model=trained_model, next_action=next_action, 
#                                         actions=actions, outcomes=feedbacks, hidden_size=hidden_size, tokenizer=tokenizer)
# print('je pense que :', next_feedback)
actions = []
feedbacks = []
for i in range(1):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < 5:
        continue
    action = actions[-5:]
    feedback = feedbacks[-5:]
    trained_model = test_time_training(trained_model, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)
    decide = str(np.random.choice(env.get_actions()))
    next_feedback, pred = predict_next_feedback(model=trained_model, next_action=decide, 
                                        actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
    print(f"Prédiction pour l'action {decide} : {tokenizer.decode(next_feedback)} pred {pred}")
    print(f"i {i}")

In [4]:
hidden_size = 32

trained_model = FeedbackPredictor(5, hidden_size, 2)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(trained_model.parameters(), lr=0.01)
# Adaptation en temps réel

actions = []
feedbacks = []
# env = small_loop(x=1, y=1, theta=0)
env = Environnement3()
tokenizer=SimpleTokenizerV1(
    {
        'a':2,
        'b':3,
        'x':0,
        'y':1
    }
)

for i in range(2):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < 5:
        continue
    action = actions[-5:]
    feedback = feedbacks[-5:]
    trained_model = test_time_training(trained_model, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)
    for decide in env.get_actions():
        next_feedback, pred = predict_next_feedback(model=trained_model, next_action=decide, 
                                            actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
        print(f"Prédiction pour l'action {decide} après ses actions {action}: {tokenizer.decode(next_feedback)} avec proba {pred}")
    print(f"i {i}")

In [5]:
hidden_size = 32

trained_model = FeedbackPredictor(5, hidden_size, 2)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(trained_model.parameters(), lr=0.01)
# Adaptation en temps réel

actions = []
feedbacks = []
# env = small_loop(x=1, y=1, theta=0)
env = Environnement6()
tokenizer=SimpleTokenizerV1(
    {
        'a':2,
        'b':3,
        'x':0,
        'y':1
    }
)

for i in range(200):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < 5:
        continue
    action = actions[-5:]
    feedback = feedbacks[-5:]
    trained_model = test_time_training(trained_model, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)
    for decide in env.get_actions():
        next_feedback, pred = predict_next_feedback(model=trained_model, next_action=decide, 
                                            actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
        print(f"Prédiction pour l'action {decide} après ses actions {action}: {tokenizer.decode(next_feedback)} avec proba {pred}")
    print(f"i {i}")

Prédiction pour l'action a après ses actions ['b', 'b', 'a', 'b', 'b']: x avec proba tensor([[ 0.1754, -0.4466]])
Prédiction pour l'action b après ses actions ['b', 'b', 'a', 'b', 'b']: x avec proba tensor([[ 0.1921, -0.4633]])
i 4
Prédiction pour l'action a après ses actions ['b', 'a', 'b', 'b', 'a']: x avec proba tensor([[ 0.3671, -0.5282]])
Prédiction pour l'action b après ses actions ['b', 'a', 'b', 'b', 'a']: x avec proba tensor([[ 0.3906, -0.5452]])
i 5
Prédiction pour l'action a après ses actions ['a', 'b', 'b', 'a', 'a']: x avec proba tensor([[ 0.5322, -0.6028]])
Prédiction pour l'action b après ses actions ['a', 'b', 'b', 'a', 'a']: x avec proba tensor([[ 0.5550, -0.6139]])
i 6
Prédiction pour l'action a après ses actions ['b', 'b', 'a', 'a', 'a']: x avec proba tensor([[ 0.4625, -0.5143]])
Prédiction pour l'action b après ses actions ['b', 'b', 'a', 'a', 'a']: x avec proba tensor([[ 0.4762, -0.5177]])
i 7
Prédiction pour l'action a après ses actions ['b', 'a', 'a', 'a', 'b']: 

In [6]:
hidden_size = 32
dim_in = 3

trained_model = FeedbackPredictor(dim_in, hidden_size, 2)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(trained_model.parameters(), lr=0.01)
# Adaptation en temps réel

actions = []
feedbacks = []
env = small_loop(x=1, y=1, theta=0)
tokenizer = SimpleTokenizerV1(
    vocab={
        'forward': 4,
        'turn_right': 5,
        'turn_left': 2,
        'feel_front': 3,
        'empty': 0,
        'wall': 1
    }
)
for i in range(200):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < dim_in:
        continue
    action = actions[-dim_in:]
    feedback = feedbacks[-dim_in:]
    trained_model = test_time_training(trained_model, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)
    for decide in env.get_actions():
        next_feedback, pred = predict_next_feedback(model=trained_model, next_action=decide, 
                                            actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
        print(f"Prédiction pour l'action {decide} après ses actions {[str(f'{x}, {y}') for x, y in zip(action, feedback)]}: {tokenizer.decode(next_feedback)} avec proba {pred}")
    print(f"i {i}")

Prédiction pour l'action forward après ses actions ['turn_right, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.1994, -0.0607]])
Prédiction pour l'action turn_left après ses actions ['turn_right, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[0.1394, 0.0047]])
Prédiction pour l'action turn_right après ses actions ['turn_right, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.2355, -0.0922]])
Prédiction pour l'action feel_front après ses actions ['turn_right, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.1670, -0.0281]])
i 2
Prédiction pour l'action forward après ses actions ['forward, empty', 'turn_right, empty', 'turn_left, empty']: empty avec proba tensor([[ 0.5997, -0.4090]])
Prédiction pour l'action turn_left après ses actions ['forward, empty', 'turn_right, empty', 'turn_left, empty']: empty avec proba tensor([[ 0.4493, -0.2723]])
Prédiction pour l'action turn_right 

In [7]:
env_test = small_loop(x= 1, y=1, theta=0, world= np.array([
                [1, 1, 1, 1, 1],
                [1, 0, 0, 0, 1],
                [1, 1, 1, 1, 1],
            ]))

hidden_size = 32
dim_in = 7

trained_model = FeedbackPredictor(dim_in, hidden_size, 2)
trained_modelLSTM = InteractionLSTM(input_size=input_size, hidden_size=hidden_size, output_size=2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(trained_model.parameters(), lr=0.01, weight_decay=0.001)


criterionLSTM = nn.CrossEntropyLoss()
optimizerLSTM = optim.Adam(trained_modelLSTM.parameters(), lr=0.01, weight_decay=0.001)
# Adaptation en temps réel

actions = []
feedbacks = []
tokenizer = SimpleTokenizerV1(
    vocab={
        'forward': 4,
        'turn_right': 5,
        'turn_left': 2,
        'feel_front': 3,
        'empty': 0,
        'wall': 1
    }
)


In [8]:
for i in range(100):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < dim_in:
        continue
    action = actions[-dim_in:]
    feedback = feedbacks[-dim_in:]
    trained_model = test_time_training(trained_model, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizer ,criterion=criterion,tokenizer=tokenizer)
    for decide in env.get_actions():
        next_feedback, pred = predict_next_feedback(model=trained_model, next_action=decide, 
                                            actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
        print(f"Prédiction pour l'action {decide} après ses actions {[str(f'{x}, {y}') for x, y in zip(action, feedback)]}: {tokenizer.decode(next_feedback)} avec proba {pred}")
    print(f"i {i}")


Prédiction pour l'action forward après ses actions ['turn_left, empty', 'feel_front, empty', 'turn_right, empty', 'forward, wall', 'turn_left, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.2979, -0.3673]])
Prédiction pour l'action turn_left après ses actions ['turn_left, empty', 'feel_front, empty', 'turn_right, empty', 'forward, wall', 'turn_left, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.2346, -0.3170]])
Prédiction pour l'action turn_right après ses actions ['turn_left, empty', 'feel_front, empty', 'turn_right, empty', 'forward, wall', 'turn_left, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.3268, -0.3797]])
Prédiction pour l'action feel_front après ses actions ['turn_left, empty', 'feel_front, empty', 'turn_right, empty', 'forward, wall', 'turn_left, empty', 'forward, empty', 'turn_right, empty']: empty avec proba tensor([[ 0.2674, -0.3462]])
i 6
Prédiction pour l'action forward après s

In [9]:
for i in range(100):
    actions.append(str(np.random.choice(env.get_actions())))
    feedbacks.append(env.outcome(actions[-1]))
    if len(actions) < dim_in:
        continue
    action = actions[-dim_in:]
    feedback = feedbacks[-dim_in:]
    trained_model = test_time_training(trained_modelLSTM, actions, feedbacks, hidden_size=hidden_size,
                                    optimizer=optimizerLSTM ,criterion=criterionLSTM,tokenizer=tokenizer)
    for decide in env.get_actions():
        next_feedback, pred = predict_next_feedback(model=trained_modelLSTM, next_action=decide, 
                                            actions=action, outcomes=feedback, hidden_size=hidden_size, tokenizer=tokenizer)
        print(f"Prédiction pour l'action {decide} après ses actions {[str(f'{x}, {y}') for x, y in zip(action, feedback)]}: {tokenizer.decode(next_feedback)} avec proba {pred}")
    print(f"i {i}")

tensor([[[5., 0., 4.]]])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]]])


TypeError: tuple indices must be integers or slices, not tuple

In [None]:
input_size = 5
hidden_size = 16
output_size = 2
model = FeedbackPredictor(input_size, hidden_size, output_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
tokenizer = SimpleTokenizerV1(
    vocab={
        'forward': 4,
        'turn_right': 5,
        'turn_left': 2,
        'feel_front': 3,
        'empty': 0,
        'wall': 1
    }
)

trained_model = test_time_training(model, actions, feedbacks, tokenizer)

print(predict_next_feedback(trained_model, 'forward', actions, feedbacks, hidden_size, tokenizer))
print(predict_next_feedback(trained_model, 'forward', actions, feedbacks, hidden_size, tokenizer))
print(predict_next_feedback(trained_model, 'forward', actions, feedbacks, hidden_size, tokenizer))
print(predict_next_feedback(trained_model, 'turn_right', actions, feedbacks, hidden_size, tokenizer))
print(predict_next_feedback(trained_model, 'turn_left', actions, feedbacks, hidden_size, tokenizer))
print(predict_next_feedback(trained_model, 'feel_front', actions, feedbacks, hidden_size, tokenizer))

In [None]:
def show_proba_from_seq(_seg):
    seq = tokenizer.encode(_seg)
    seq = torch.tensor(seq, dtype=torch.float)
    predi =  trained_model(seq)
    prob = torch.nn.functional.softmax(predi, dim=0)
    deocde = tokenizer.decode(torch.argmax(predi, dim=0).item())
    print(f"Prédiction de la séquence {_seg} :  probabilité {prob.tolist()}, decode {deocde}")

print("porba si on avance")
show_proba_from_seq(['forward', 'empty', 'forward'])
show_proba_from_seq(['forward', 'wall', 'forward'])
show_proba_from_seq(['turn_left', 'empty', 'forward'])
show_proba_from_seq(['turn_right', 'empty', 'forward'])
show_proba_from_seq(['feel_front', 'empty', 'forward'])
show_proba_from_seq(['feel_front', 'wall', 'forward'])

print("porba si on turn left")
show_proba_from_seq(['forward', 'empty', 'turn_left'])
show_proba_from_seq(['forward', 'wall', 'turn_left'])
show_proba_from_seq(['turn_left', 'empty', 'turn_left'])
show_proba_from_seq(['turn_right', 'empty', 'turn_left'])
show_proba_from_seq(['feel_front', 'empty', 'turn_left'])
show_proba_from_seq(['feel_front', 'wall', 'turn_left'])

print("porba si on turn right")
show_proba_from_seq(['forward', 'empty', 'turn_right'])
show_proba_from_seq(['forward', 'wall', 'turn_right'])
show_proba_from_seq(['turn_left', 'empty', 'turn_right'])
show_proba_from_seq(['turn_right', 'empty', 'turn_right'])
show_proba_from_seq(['feel_front', 'empty', 'turn_right'])
show_proba_from_seq(['feel_front', 'wall', 'turn_right'])

print("porba si on feel front")
show_proba_from_seq(['forward', 'empty', 'feel_front'])
show_proba_from_seq(['forward', 'wall', 'feel_front'])
show_proba_from_seq(['turn_left', 'empty', 'feel_front'])
show_proba_from_seq(['turn_right', 'empty', 'feel_front'])
show_proba_from_seq(['feel_front', 'empty', 'feel_front'])
show_proba_from_seq(['feel_front', 'wall', 'feel_front'])
