In [361]:
import os
import pandas as pd
import pathlib
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from ast import literal_eval
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [362]:
writer = SummaryWriter('runs/cartpole1')

## Define Dataset and DataLoader

In [351]:
class CartpoleNLPDataset(Dataset):
    def __init__(self, nlp_results_path, train):
        self.df = pd.read_csv(path)

        # Process columns of the dataframe that contain arrays that are formatted as strings in the csv
        array_elements = ['params', 'X', 'U']
        for elem in array_elements:
            self.df[elem] = self.df[elem].apply(literal_eval) # Convert strings into lists 
            self.df[elem] = self.df[elem].apply(lambda x: np.array(x, dtype=np.float64)) # Convert lists into np arrays of type np.float64

        # Split the data into training and testing sets
        train_df, test_df = train_test_split(self.df, test_size=0.2)

        if train:
            self.df = train_df
        else:
            self.df = test_df

        # Store the number of params (NN inputs) and length of trajectory (NN outputs)
        self.n_params = len(self.df['params'].iloc[0])
        N = self.df['X'].iloc[0].shape[0] # number of timesteps
        n_states = len(self.df['X'].iloc[0][0])
        n_controls = len(self.df['U'].iloc[0][0])

        # print(n_states)
        # print(n_controls)
        # print(N)
        self.n_traj = n_states*N + n_controls*(N-1)

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        params = self.df.iloc[idx].params
        X = self.df.iloc[idx].X
        U = self.df.iloc[idx].U
        
        Z = np.array([np.hstack((X[i], U[i])) for i in range(len(U))], dtype=np.float64) # create the trajectory vector where element z_i = [x_i, u_i]
        Z = Z.flatten()
        # print(Z[-6:])
        # print(Z.shape)
        Z = np.concatenate((Z, X[-1])) # add the final state to the end of the trajectory
        # print(Z[-6:])
        # print(Z.shape)
        # print()

        return params, Z
    def getitem(self, idx):
        params = self.df.iloc[idx].params
        X = self.df.iloc[idx].X
        U = self.df.iloc[idx].U
        
        Z = np.array([np.hstack((X[i], U[i])) for i in range(len(U))]) # create the trajectory vector where element z_i = [x_i, u_i]
        
        return params, Z

    

In [352]:
path = 'data/cartpole_DIRCOL_5000.csv'
train_data = CartpoleNLPDataset(path, train=True)
test_data = CartpoleNLPDataset(path, train=False)
# print(train_data.getitem(0)[1].dtype)
print(train_data.n_traj)

train_dataloader = DataLoader(train_data, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=True)

params, Z = next(iter(train_dataloader))
# print(params.shape)
# print(Z.shape)


204


In [353]:
cartpole_data = CartpoleNLPDataset(path, train=True)
cartpole_data.n_params
cartpole_data.n_traj

204

## Define the NN

In [354]:
class CartpoleNN(nn.Module):
    def __init__(self, n_params, n_traj):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(n_params, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, n_traj)
        )
        self.linear_relu_stack = self.linear_relu_stack.double()

        
    def forward(self, params):
        # params = self.flatten(params)
        traj = self.linear_relu_stack(params)
        return traj


In [355]:
model = CartpoleNN(cartpole_data.n_params, cartpole_data.n_traj).to(device)
print(f"Model structure: {model}\n\n")

# for name, param in model.named_parameters():
#     print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")


Model structure: CartpoleNN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=2, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=204, bias=True)
  )
)




## Training Loop

In [356]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()

    for batch, (X,y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)



        pred = model(X)
        loss = loss_fn(pred, y)

        # Backprop
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 1 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for X, y in dataloader:
            X = X.to(device)
            y = y.to(device)
            
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches # average loss per batch
    print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")




In [357]:
learning_rate = 1e-3
batch_size = 64
epochs = 100

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")


Epoch 1
-------------------------------
loss: 9.408353  [    0/ 4000]
loss: 9.337678  [   16/ 4000]
loss: 9.461040  [   32/ 4000]
loss: 9.491259  [   48/ 4000]
loss: 9.465789  [   64/ 4000]
loss: 9.541639  [   80/ 4000]
loss: 9.983864  [   96/ 4000]
loss: 9.397926  [  112/ 4000]
loss: 9.156827  [  128/ 4000]
loss: 9.412959  [  144/ 4000]
loss: 9.289789  [  160/ 4000]
loss: 9.415695  [  176/ 4000]
loss: 9.405247  [  192/ 4000]
loss: 9.352291  [  208/ 4000]
loss: 9.425069  [  224/ 4000]
loss: 9.640050  [  240/ 4000]
loss: 9.420011  [  256/ 4000]
loss: 9.280666  [  272/ 4000]
loss: 9.685352  [  288/ 4000]
loss: 9.352651  [  304/ 4000]
loss: 9.959709  [  320/ 4000]
loss: 9.472398  [  336/ 4000]
loss: 9.425698  [  352/ 4000]
loss: 9.441806  [  368/ 4000]
loss: 9.379465  [  384/ 4000]
loss: 9.583863  [  400/ 4000]
loss: 9.179898  [  416/ 4000]
loss: 9.538098  [  432/ 4000]
loss: 9.983825  [  448/ 4000]
loss: 9.434400  [  464/ 4000]
loss: 9.266290  [  480/ 4000]
loss: 9.219703  [  496/ 4000]


KeyboardInterrupt: 