In [1]:
import gym
import torch
from torch import nn
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
import random

In [3]:
def get_env_data(n):
    env = gym.make("ALE/Freeway-v5", render_mode="rgb_array", obs_type="ram", difficulty=1, mode=7)
    observation = env.reset()

    df = pd.DataFrame([observation])
    # Actions: 0: nichts, 1: up, 2: down

    actions = []

    for i in range(n):
        action = get_action_sample()
        if i != 0:
            df.loc[len(df)] = observation
        actions.append(action)
        observation, reward, done, info = env.step(action)
        if done:
            observation = env.reset()
        if i % (n / 100) == 0:
            print(f"{(i / n) * 100}%")
    env.close()
    df = df[[14, 103, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117]]
    df["actions"] = actions
    return df

In [4]:
def get_action_sample():
    x = random.randint(0, 101)
    if x < 90:
        return 1
    if x < 97:
        return 2
    return 0

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(15, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 14),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.linear_relu_stack(x)
        x = torch.FloatTensor(x)
        return x

In [15]:
def get_dfs(df):
    df = pd.read_csv("gamedata/world_data_1.csv")
    start = df.head(1)
    start.to_csv("gamedata/start.csv")
    df = df.tail(len(df) - 1)
    df.drop(["Unnamed: 0"], axis=1, inplace=True)

    dfY = df.copy()
    dfY.drop(["actions"], axis=1, inplace=True)

    dfY = dfY.drop(dfY.index[[0]])
    df = df.drop(df.index[[len(df) - 1]])
    dfY.index = df.index

    df = df.reset_index()
    dfY = df.reset_index()
    return df, dfY

In [17]:
def train(X, y, model, loss_fn, optimizer):
    model.train()
    loss_sum = 0
    for i in range(len(y)):
        X_data = list(X.iloc[i])
        y_data = list(y.iloc[i])
        X_data = torch.tensor(X_data)
        y_data = torch.tensor(y_data)
        X_data.cuda()
        y_data.cuda()

        pred = model(X_data.float())

        loss = loss_fn(pred.to(torch.float32), y_data.to(torch.float32))
        loss_sum += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 5000 == 0:
            #print(f"loss: {loss_sum / 1000}")
            loss_sum = 0


def test(X, y, model, loss_fn):
    loss_sum = 0

    model.eval()
    with torch.no_grad():
        for i in range(len(y)):
            X_data = list(X.iloc[i])
            y_data = list(y.iloc[i])
            X_data = torch.tensor(X_data)
            y_data = torch.tensor(y_data)

            pred = model(X_data.float())
            loss = loss_fn(pred, y_data)
            loss_sum += loss

    loss_sum /= len(y)

    print(f"Avg loss: {loss_sum}!")
    return loss_sum

In [18]:
def cv_model(X, y):
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=69)

    model = Net()
    for layer in model.children():
        if hasattr(layer, "reset_parameters"):
            layer.reset_parameters()

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

    last_test_avg = 10000
    test_avg = 0
    overfit = 0

    epochs = 200

    avg_losses = []

    avg_losses.append(test(test_X, test_y, model, loss_fn))

    for t in range(epochs):
        print(f"Epoch {t + 1}-----------------------------")
        train(train_X, train_y, model, loss_fn, optimizer)
        test_avg = test(test_X, test_y, model, loss_fn)
        avg_losses.append(test_avg)
        if test_avg > last_test_avg:
            overfit += 1
        else:
            overfit = 0
            last_test_avg = test_avg
        if overfit >= 5:
            print(f"Epoche: {t}")
            break
    torch.save(model, f"game_model/game_model_1")
    return avg_losses
print("done")

done


In [19]:
def cv_model2(X, y):
    k_fold = KFold(n_splits=5, shuffle=True, random_state=6988)
    counter = 2
    avg_all = []
    for train_idx, test_idx in k_fold.split(X, y):
        model = Net().cuda()
        for layer in model.children():
            if hasattr(layer, "reset_parameters"):
                layer.reset_parameters()

        loss_fn = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

        print(f"split: {counter}")
        train_X = df.loc[train_idx]
        train_y = dfY.loc[train_idx]

        test_X = df.loc[test_idx]
        test_y = dfY.loc[test_idx]
        last_test_avg = 0
        test_avg = 0
        overfit = 0

        avg_losses = []

        epochs = 100

        for t in range(epochs):
            print(f"Epoch {t + 1}-----------------------------")
            train(train_X, train_y, model, loss_fn, optimizer)
            test_avg = test(test_X, test_y, model, loss_fn)
            avg_losses.append(test_avg)
            if test_avg > last_test_avg:
                overfit += 1
            else:
                overfit = 0
                last_test_avg = test_avg
            if overfit >= 5:
                print(overfit)
                print(f"Epoche: {t}")
                break
        avg_all.append(avg_losses)
        break
    return avg_all
print("done")

done


In [21]:
n_data = [1000, 2000, 5000, 10000, 20000]
df_losses = pd.DataFrame()

for n in n_data:
    print(n)
    df = get_env_data(n)
    df, dfY = get_dfs(df)
    avg_losses = cv_model(df, dfY)

    x = []

    while len(avg_losses) < 200:
        avg_losses.append(avg_losses[len(avg_losses) - 1])
    for i in avg_losses:
        x.append(i.item())
    df_losses[f"{n}"] = x


1000
0.0%
1.0%
2.0%
3.0%
4.0%
5.0%
6.0%
7.000000000000001%
8.0%
9.0%
10.0%
11.0%
12.0%
13.0%
14.000000000000002%
15.0%
16.0%
17.0%
18.0%
19.0%
20.0%
21.0%
22.0%
23.0%
24.0%
25.0%
26.0%
27.0%
28.000000000000004%
28.999999999999996%
30.0%
31.0%
32.0%
33.0%
34.0%
35.0%
36.0%
37.0%
38.0%
39.0%
40.0%
41.0%
42.0%
43.0%
44.0%
45.0%
46.0%
47.0%
48.0%
49.0%
50.0%
51.0%
52.0%
53.0%
54.0%
55.00000000000001%
56.00000000000001%
56.99999999999999%
57.99999999999999%
59.0%
60.0%
61.0%
62.0%
63.0%
64.0%
65.0%
66.0%
67.0%
68.0%
69.0%
70.0%
71.0%
72.0%
73.0%
74.0%
75.0%
76.0%
77.0%
78.0%
79.0%
80.0%
81.0%
82.0%
83.0%
84.0%
85.0%
86.0%
87.0%
88.0%
89.0%
90.0%
91.0%
92.0%
93.0%
94.0%
95.0%
96.0%
97.0%
98.0%
99.0%


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x16 and 15x128)

In [112]:
df_losses.to_csv("gamedata/data_test_4.csv")
df_losses

Unnamed: 0,1000,2000,5000,10000,20000,50000
0,7437.459473,7225.813965,7255.167969,7275.524414,7306.245117,7319.188965
1,3132.98877,2947.085693,2510.976074,2889.85498,3848.984619,2869.0625
2,2282.169189,2194.750977,2211.690674,2593.416504,3629.458984,2460.67749
3,1947.890625,1904.734863,1920.263428,1935.618164,3432.967773,1454.46814
4,1782.133789,1277.788086,1726.900146,1766.064819,3307.291016,1105.400879
5,1692.899414,1106.661743,1599.137939,1645.445923,2735.440918,1002.79541
6,1645.171265,1004.954773,1509.948853,1555.667358,2166.250244,943.669556
7,1618.498779,947.637817,1448.989502,1492.518799,2076.009521,909.416382
8,1602.04248,911.591309,1411.71875,1445.187622,2032.991211,888.576416
9,1591.599731,886.964539,1386.872681,1411.67688,2008.097778,874.340149


In [237]:
class CustomEnv(gym.Env):
    def __init__(self, model):
        self.model = model
        self.state = torch.tensor(list(pd.read_csv("gamedata/start.csv").drop(["Unnamed: 0.1", "Unnamed: 0"], axis=1).loc[0]))
        self.pos = 6
        self.max_pos = 6
        self.done = False
        self.score = 0
        self.observation_space = gym.spaces.Box(low=0, high=210, shape=(14,))
        self.action_space = gym.spaces.Discrete(3)

    def reset(self):
        self.state = torch.tensor(list(pd.read_csv("gamedata/start.csv").drop(["Unnamed: 0.1", "Unnamed: 0", "actions"], axis=1).loc[0]))
        self.pos = 6
        self.max_pos = 6
        self.done = False
        self.score = 0
        return self.state

    def step(self, action):
        self.state = self.state.tolist()
        self.state.append(action)
        obs = self.model.forward(torch.tensor(self.state))
        self.pos = obs[0]
        reward = 0
        if self.pos > self.max_pos + 1:
            reward += 1
            self.max_pos = self.pos
        if 90 <= obs[2] <= 100:
            reward -= 1000
        if  self.score != obs[1]:
            self.score = obs[1]
            reward += 100
        return obs, reward, False, {}

    def render(self, mode="human", close=False):
        return

In [241]:
from sb3_contrib import TRPO

model = torch.load("game_model/game_model_1")

env = CustomEnv(model)
model = TRPO("MlpPolicy", env, gamma=0.99, verbose=1)
model.learn(total_timesteps=10_000, log_interval=4)
model.save("trpo_custom")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


RuntimeError: expected scalar type Long but found Float

In [264]:
model = torch.load("game_model/game_model_1")
x = torch.tensor(list(pd.read_csv("gamedata/start.csv").drop(["Unnamed: 0.1", "Unnamed: 0", "actions"], axis=1).loc[0]))
#model.forward(torch.tensor(x.tolist().append(2)))
x = x.tolist()
x.append(2)
x = torch.tensor(x)
x = x.type(torch.IntTensor)

model.forward(x)
x

RuntimeError: expected scalar type Int but found Float