In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F  # Ensure this import is added

from torch.utils.data import DataLoader, TensorDataset
import gym
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2


In [2]:

# Parameters
sequence_length = 4  # Number of images in each sequence
num_episodes = 100   # Number of episodes for data collection

# Environment Setup
env = gym.make('CartPole-v1')
data_images = []
data_states = []

# Transformation for images
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize image to manageable size
    transforms.ToTensor()         # Convert image to PyTorch tensor
])

def heuristic_policy(observation):
    _, _, angle, _ = observation
    return 0 if angle < 0 else 1  # Move cart based on the angle of the pole

# Data Collection using Heuristic Policy
for episode in range(num_episodes):
    observation = env.reset()
    images = []
    for t in range(1000):
        img = env.render(mode='rgb_array')
        img_pil = Image.fromarray(img)
        tensor_image = transform(img_pil)  # Transform image immediately
        images.append(tensor_image)
        
        if len(images) >= sequence_length:
            # Stack the last sequence_length images to form a single sequence tensor
            sequence_tensor = torch.stack(images[-sequence_length:], dim=0).permute(1, 0, 2, 3)
            data_images.append(sequence_tensor)
            data_states.append(observation)
        
        action = heuristic_policy(observation)  # Use the heuristic policy
        observation, reward, done, info = env.step(action)
        if done:
            break

env.close()

# Convert data_states to a tensor
data_states = torch.tensor(data_states, dtype=torch.float32)

# Dataset and DataLoader
dataset = TensorDataset(torch.stack(data_images), data_states)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Model Definition
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
            nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
        )
        # Correctly calculate the input size for the linear layer based on the output from conv_layers
        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 4 * 16 * 16, 128),  # Adjusted based on actual output size
            nn.ReLU(),
            nn.Linear(128, 4)  # Predicting 4 state variables
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor for the fully connected layer
        x = self.fc_layers(x)
        return x

# Model instantiation and training setup
model = CNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, states in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, states)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Save the model
torch.save(model.state_dict(), 'cartpole_cnn_test.pth')


  data_states = torch.tensor(data_states, dtype=torch.float32)


Epoch 1, Loss: 0.012585052289068699
Epoch 2, Loss: 0.013496935367584229
Epoch 3, Loss: 0.03656453639268875
Epoch 4, Loss: 0.02040678635239601
Epoch 5, Loss: 0.03142130747437477
Epoch 6, Loss: 0.040939267724752426
Epoch 7, Loss: 0.012346663512289524
Epoch 8, Loss: 0.010714675299823284
Epoch 9, Loss: 0.008351529017090797
Epoch 10, Loss: 0.018499860540032387


In [19]:
cnn = CNN()  # Make sure CNN is defined or imported
cnn.load_state_dict(torch.load("cartpole_cnn_test.pth"))  # Load the pretrained model
cnn.eval()  # Set the model to evaluation mode

CNN(
  (conv_layers): Sequential(
    (0): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (4): ReLU()
    (5): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=32768, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [22]:
#torch.manual_seed(0)  # Set a seed for reproducibility

cnn.eval()
for images, states in dataloader:
    print(cnn(images)[4],states[4])
    break
cnn.train()

tensor([-0.0139,  1.6255, -0.0506, -2.1316], grad_fn=<SelectBackward0>) tensor([-0.0140,  1.7032, -0.0226, -2.2323])


CNN(
  (conv_layers): Sequential(
    (0): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (4): ReLU()
    (5): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=32768, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [20]:
def heuristic_policy(observation):
    obs = observation[0]
    print(obs)
    ang = obs[2]
    print(ang)
    return 0 if ang < 0 else 1  # Move cart based on the angle of the pole

# Define the CNN architecture adapted to your image preprocessing and dimensions
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(4, 16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)        
        self.fc1 = nn.Linear(32 * 21 * 21, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 4)  # Output size to predict state variables

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))        
        x = x.view(-1, 32 * 21 * 21)  
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def preprocess_image(image):
    # Convert image to grayscale and resize to 84x84
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    resized_image = cv2.resize(gray_image, (84, 84))
    return resized_image

def collect_data(episodes=500):
    env = gym.make('CartPole-v2')
    action_data = []
    state_data = []
    image_data = []

    for episode in range(episodes):
        images = []
        observation = env.reset()
        done = False
        while not done:
            image = env.render('rgb_array')
            processed_image = preprocess_image(image)
            images.append(processed_image)

            # Collect data only if we have 4 consecutive frames
            if len(images) >= 4:
                # Stack the last 4 images to create input for CNN
                input_images = np.stack(images[-4:], axis=0)
                input_images = input_images / 255.0  # Normalize images

                image_data.append(input_images)
                state_data.append(observation)  # Collect state data

            action = heuristic_policy(observation)  # Your heuristic policy
            observation, _, done, _ = env.step(action)

    return np.array(image_data), np.array(state_data)

# Data collection
image_data, state_data = collect_data()
image_tensor = torch.tensor(image_data, dtype=torch.float32)
state_tensor = torch.tensor(state_data, dtype=torch.float32)

# Define dataset and dataloader for training
dataset = TensorDataset(image_tensor, state_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Model setup
model = SimpleCNN()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
model.train()
for epoch in range(10):  # You can adjust the number of epochs
    total_loss = 0
    for images, states in dataloader:
        optimizer.zero_grad()
        predictions = model(images)
        loss = criterion(predictions, states)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(dataloader)}')

# Save model
torch.save(model.state_dict(), 'cartpole_cnn2.pth')


[-0.04935691  0.0214029  -0.01350331  0.00454222]
-0.013503309
-0.048928853


IndexError: invalid index to scalar variable.

In [18]:
model.eval()
for images, states in dataloader:
    print(model(images)[8],states[8])
    plt.imshow(images[8][3])
    break
model.train()

tensor([-0.0179, -0.9981,  0.0437,  1.3493], grad_fn=<SelectBackward0>) tensor([-0.0169, -1.0171,  0.0446,  1.3195])


IndexError: index 3 is out of bounds for dimension 0 with size 3

In [3]:
env = gym.make('CartPole-v2')
env.reset()
try:
    observation, reward, done, info = env.step(0)
    print("Step successful:", observation, reward, done, info)
except Exception as e:
    print("Error in step execution:", e)


Step successful: [-0.04015512 -0.15969469 -0.03355908  0.23401141] 1.0 False {'additional_info': False}
