# AS 3.1 Moonlander assignement
***By Joris Heemskerk & Finn de Graaf***

This notebook contains all the code required to train and run the moonlander algoritm

## 1. Base assignment

The base assignment is to implement an object-oriented Deep Q-learning Network in Python. This network will be trained on the Lunar Lander enviornment.

### 1.1. Imports

In [6]:
from agent import Agent
from policy import Policy
from state import State

import torch
from torch import nn

### 1.2. Defining the constants

In [7]:
NUMBER_OF_RUNS = 1

### 1.3. Defining the network

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(8, 150),
            nn.ReLU(),
            nn.Linear(150, 120),
            nn.ReLU(),
            nn.Linear(120, 4),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
    def train_model(self, train_loader, loss_fn, optimizer, num_epochs):
        for epoch in range(num_epochs):
            print(f"Epoch {epoch+1}\n-------------------------------")
            for batch, (X, y) in enumerate(train_loader):
                # Compute prediction and loss
                pred = self(X)
                loss = loss_fn(pred, y)

                # Backpropagation
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if batch % 100 == 0:
                    loss, current = loss.item(), batch * len(X)
                    print(f"loss: {loss:>7f}  [{current:>5d}/{len(train_loader.dataset):>5d}]")
            print("Training done!")

In [13]:
device = (
    "cuda"if torch.cuda.is_available()
    else 
        "mps" if torch.backends.mps.is_available()
    else 
        "cpu"
)
print(f"Using \033[32m{device}\033[0m device\n")

model = NeuralNetwork().to(device)
print(model)

Using [32mcuda[0m device

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=8, out_features=150, bias=True)
    (1): ReLU()
    (2): Linear(in_features=150, out_features=120, bias=True)
    (3): ReLU()
    (4): Linear(in_features=120, out_features=4, bias=True)
  )
)


### 1.4. Defining the agent

### 1.5. Training the model

### 1.6. Testing the model

In [14]:
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset(seed=42)
for _ in range(NUMBER_OF_RUNS):
   action = env.action_space.sample()  # this is where you would insert your policy
   observation, reward, terminated, truncated, info = env.step(action)

   if terminated or truncated:
      observation, info = env.reset()

env.close()

KeyboardInterrupt: 