# Using PPO's recorded data

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras import layers, models

import pickle

# Load the Pickle file
with open('training_data.pkl', mode='rb') as file:
    while True:
        try:
            data = pickle.load(file)
            timestamp = data['timestamp']
            episode = data['episode']
            step = data['step']
            state = data['state']
            action = data['action']
            reward = data['reward']

            # Now you can use this data, e.g., for training your model
            print(f"Episode {episode}, Step {step}, Reward {reward}")

        except EOFError:
            break  # End of file reached


In [None]:
# Build the model using tf.keras.Sequential
model = models.Sequential()

# Convolutional layers (cnn_base)
model.add(layers.Conv2D(8, kernel_size=4, strides=2, activation='relu', input_shape=(96, 96, 3)))  # Input shape: (96, 96, 3)
model.add(layers.Conv2D(16, kernel_size=3, strides=2, activation='relu'))
model.add(layers.Conv2D(32, kernel_size=3, strides=2, activation='relu'))
model.add(layers.Conv2D(64, kernel_size=3, strides=2, activation='relu'))
model.add(layers.Conv2D(128, kernel_size=3, strides=1, activation='relu'))
model.add(layers.Conv2D(256, kernel_size=3, strides=1, activation='relu'))

# Flatten the output of convolutional layers
model.add(layers.Flatten())

# Fully connected layer
model.add(layers.Dense(100, activation='relu'))

# Output layer: 4 actions (for CarRacing-v2, you might have discrete actions like left, right, straight, accelerate)
model.add(layers.Dense(3, activation='softmax'))  # Softmax for discrete action classification

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
              loss='sparse_categorical_crossentropy',  # For discrete actions
              metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
model.fit(X, y, batch_size=32, epochs=10)


In [None]:
# Assuming you have new data (or test data) in a similar format as the training data
test_data = pd.read_csv('ppo_test_data.csv')

# Process the test data similarly as the training data
test_states = test_data['state'].values
test_actions = test_data['action'].values
test_states = [np.array(eval(state)) for state in test_states]
test_X = np.array([preprocess_state(state) for state in test_states])
test_y = np.array(test_actions)

# Evaluate the model on the test set
model.evaluate(test_X, test_y)
