In [1]:
import numpy as np
import pandas as pd
from env import Env2048, run_simulations
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
aggregated_simulation_results = run_simulations(num_processes=10, episodes_per_process=1000) # 10 * 1000 = 10000 runs

In [3]:
print(f"Sample: {aggregated_simulation_results[0]}") # max_tile, score, moves, net_reward

Sample: [128.0, 1352.0, 114, -24.114]


In [4]:
columns = ["max_tile", "score", "moves", "net_reward"]
simulation_df = pd.DataFrame(aggregated_simulation_results, columns=columns)
mean_df = simulation_df.groupby("max_tile")[["score", "moves"]].mean().reset_index()
std_df = simulation_df.groupby("max_tile")[["score", "moves"]].std().reset_index()
tile_counts = simulation_df.groupby("max_tile").size().reset_index(name="frequency")

In [5]:
data_df = pd.concat([mean_df[["max_tile", "score", "moves"]], std_df[["score", "moves"]]], axis=1)
data_df.dropna(inplace=True)
data_columns = ["max_tile", "mean_score", "std_score", "mean_moves", "std_moves"]
data_df.columns = data_columns

In [6]:
tile_counts

Unnamed: 0,max_tile,frequency
0,16.0,56
1,32.0,1110
2,64.0,4216
3,128.0,4084
4,256.0,534


In [7]:
data_df

Unnamed: 0,max_tile,mean_score,std_score,mean_moves,std_moves
0,16.0,144.214286,32.535714,32.979726,3.775347
1,32.0,317.682883,47.82973,74.660255,7.07214
2,64.0,623.800759,69.323055,131.229334,10.434567
3,128.0,1169.454456,100.307297,202.442737,14.394867
4,256.0,2182.921348,147.314607,261.230293,17.063349


In [8]:
linear_model = LinearRegression()
inputs, labels = data_df[["max_tile"]].to_numpy(), data_df[["mean_score", "std_score", "mean_moves", "std_moves"]].to_numpy()
_ = linear_model.fit(inputs, labels)

In [9]:
new_max_tiles = [[512], [1024], [2048]]
predictions = linear_model.predict(new_max_tiles)
print(f"Prediction: {predictions}")

Prediction: [[ 4362.48344151   270.47483915   513.4089101     31.50027842]
 [ 8672.39810202   507.38988862   975.92108541    57.48753357]
 [17292.22742304   981.21998756  1900.94543604   109.46204388]]


In [10]:
for max_tile, prediction in zip(new_max_tiles, predictions):
    data_df.loc[len(data_df)] = max_tile + prediction.tolist()

In [11]:
def generate_fake_simulation_data(tile, mean_score, std_score, mean_moves, std_moves, size=1000):
    d = dict()
    score = np.random.normal(mean_score, std_score, size).reshape(-1, 1)
    moves = np.random.normal(mean_moves, std_moves, size).reshape(-1, 1)
    points = np.random.choice([points for points in range(0, 129, 2)], size=size).reshape(-1, 1)
    data = np.concatenate((score, moves, points), axis=1)
    d[tile] = data
    return d

In [12]:
samples = []
for row in data_df.values.tolist():
    samples.append(generate_fake_simulation_data(*row))

In [None]:
samples[0]

In [None]:
rewards = []
env = Env2048()
for fake_data in fake_simulation_data:
    score, moves, points = fake_data
    reward = env.calculate_reward(score, moves, points, True)
    rewards.append(reward)

In [None]:
print(np.mean(reward))