In [12]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F  # Ensure this import is added

from torch.utils.data import DataLoader, TensorDataset
import gym
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2


## OSEF UN PEU DE CE QU'IL Y A ICI, DESCENDS DIRECT

In [2]:

# Parameters
sequence_length = 4  # Number of images in each sequence
num_episodes = 100   # Number of episodes for data collection

# Environment Setup
env = gym.make('CartPole-v1')
data_images = []
data_states = []

# Transformation for images
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize image to manageable size
    transforms.ToTensor()         # Convert image to PyTorch tensor
])

def heuristic_policy(observation):
    _, _, angle, _ = observation
    return 0 if angle < 0 else 1  # Move cart based on the angle of the pole

# Data Collection using Heuristic Policy
for episode in range(num_episodes):
    observation = env.reset()
    images = []
    for t in range(1000):
        img = env.render(mode='rgb_array')
        img_pil = Image.fromarray(img)
        tensor_image = transform(img_pil)  # Transform image immediately
        images.append(tensor_image)
        
        if len(images) >= sequence_length:
            # Stack the last sequence_length images to form a single sequence tensor
            sequence_tensor = torch.stack(images[-sequence_length:], dim=0).permute(1, 0, 2, 3)
            data_images.append(sequence_tensor)
            data_states.append(observation)
        
        action = heuristic_policy(observation)  # Use the heuristic policy
        observation, reward, done, info = env.step(action)
        if done:
            break

env.close()

# Convert data_states to a tensor
data_states = torch.tensor(data_states, dtype=torch.float32)

# Dataset and DataLoader
dataset = TensorDataset(torch.stack(data_images), data_states)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Model Definition
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
            nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
        )
        # Correctly calculate the input size for the linear layer based on the output from conv_layers
        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 4 * 16 * 16, 128),  # Adjusted based on actual output size
            nn.ReLU(),
            nn.Linear(128, 4)  # Predicting 4 state variables
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor for the fully connected layer
        x = self.fc_layers(x)
        return x

# Model instantiation and training setup
model = CNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, states in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, states)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Save the model
torch.save(model.state_dict(), 'cartpole_cnn_test.pth')


  data_states = torch.tensor(data_states, dtype=torch.float32)


Epoch 1, Loss: 0.012585052289068699
Epoch 2, Loss: 0.013496935367584229
Epoch 3, Loss: 0.03656453639268875
Epoch 4, Loss: 0.02040678635239601
Epoch 5, Loss: 0.03142130747437477
Epoch 6, Loss: 0.040939267724752426
Epoch 7, Loss: 0.012346663512289524
Epoch 8, Loss: 0.010714675299823284
Epoch 9, Loss: 0.008351529017090797
Epoch 10, Loss: 0.018499860540032387


In [14]:
cnn = CNN()  # Make sure CNN is defined or imported
cnn.load_state_dict(torch.load("cartpole_cnn_test.pth"))  # Load the pretrained model
cnn.eval()  # Set the model to evaluation mode

CNN(
  (conv_layers): Sequential(
    (0): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (4): ReLU()
    (5): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=32768, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [15]:
import os

model_path = 'M1-S2\\PLDAC\\PLDAC_BBRL\\src\\cartpole_cnn_test.pth'
absolute_path = os.path.abspath(model_path)
print("Looking for model at:", absolute_path)
if not os.path.exists(absolute_path):
    print("Model file not found.")
else:
    print("Model file found.")

Looking for model at: c:\Users\hatem\OneDrive\Documents\Programmation\M1-S2\PLDAC\PLDAC_BBRL\src\M1-S2\PLDAC\PLDAC_BBRL\src\cartpole_cnn_test.pth
Model file not found.


In [16]:
import os

# Absolute path to where the model file is stored
model_path = os.path.abspath('C:/Users/hatem/OneDrive/Documents/Programmation/M1-S2/PLDAC/PLDAC_BBRL/src/cartpole_cnn_test.pth')

# Load the model
cnn.load_state_dict(torch.load(model_path))


<All keys matched successfully>

## C ICI POUR TRAIN LE MODEL COMPATIBLE AVEC IMAGEAGENT

In [18]:

# Parameters
sequence_length = 4  # Number of images in each sequence
num_episodes = 100   # Number of episodes for data collection



# Environment Setup
env = gym.make('CartPole-v1')
data_images = []
data_states = []

# Transformation for images should be correct
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize image to manageable size
    transforms.ToTensor()         # Convert image to PyTorch tensor
])

def heuristic_policy(observation):
    _, _, angle, _ = observation
    return 0 if angle < 0 else 1

# Data Collection using Heuristic Policy
for episode in range(num_episodes):
    observation = env.reset()
    images = []
    for t in range(1000):
        img = env.render(mode='rgb_array')
        img_pil = Image.fromarray(img)
        tensor_image = transform(img_pil)  # Transform image immediately
        images.append(tensor_image)
        
        if len(images) >= sequence_length:
            # Stack the last sequence_length images to form a single sequence tensor
            sequence_tensor = torch.stack(images[-sequence_length:], dim=0)  # Dimension [sequence_length, 3, 64, 64]
            data_images.append(sequence_tensor.permute(1, 0, 2, 3))  # Rearrange to [3, sequence_length, 64, 64]
            data_states.append(observation)
        
        action = heuristic_policy(observation)
        observation, reward, done, info = env.step(action)
        if done:
            break

env.close()

# Convert data_states to a tensor
data_states = torch.tensor(data_states, dtype=torch.float32)

# Dataset and DataLoader
dataset = TensorDataset(torch.stack(data_images), data_states)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Model Definition
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
            nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
        )
        # Correctly calculate the input size for the linear layer based on the output from conv_layers
        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 4 * 16 * 16, 128),  # Adjusted based on actual output size
            nn.ReLU(),
            nn.Linear(128, 4)  # Predicting 4 state variables
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor for the fully connected layer
        x = self.fc_layers(x)
        return x

# Model instantiation and training setup
model = CNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, states in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, states)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Save the model
torch.save(model.state_dict(), 'cartpole_cnn_test2.pth')



  data_states = torch.tensor(data_states, dtype=torch.float32)


Epoch 1, Loss: 0.04328785091638565
Epoch 2, Loss: 0.006199096329510212
Epoch 3, Loss: 0.0031947209499776363
Epoch 4, Loss: 0.002347412519156933
Epoch 5, Loss: 0.007954933680593967
Epoch 6, Loss: 0.0011394956381991506


KeyboardInterrupt: 