In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
from env        import Game2048Env
from features   import pattern
from learners   import FeatureTD0Learner
from agent      import RLAgent

In [None]:
seed       = 42
episodes   = 100000
alpha      = 0.01        # step size for TD(0)
gamma      = 0.99
epsilon    = 0.01
decay      = 0.999
eps_min    = 0.001
ascii      = False
gui        = False
stat_unit  = 1000       # print stats every 1000 episodes
save_path  = "feat_td0.pkl"

# ─── 2) Seed RNGs ──────────────────────────────────────────────────────────────
random.seed(seed)
np.random.seed(seed)

# ─── 3) Build environment ────────────────────────────────────────────────────
env = Game2048Env(seed=seed, ascii_render=ascii, gui=gui)

# ─── 4) Instantiate and configure the learner ────────────────────────────────
learner = FeatureTD0Learner(alpha=alpha, gamma=gamma, sparse=True)

tuples = [
    [0,1,2,3,4,5],
    [4,5,6,7,8,9],
    [0,1,2,4,5,6],
    [4,5,6,8,9,10]
]
for patt in tuples:
    learner.add_feature(pattern(patt, iso=8))

agent = RLAgent(env, learner, epsilon=epsilon, decay=decay, eps_min=eps_min)

# 4) Training loop
scores = []
for ep in range(1, episodes+1):
    # reset eligibility traces if using TDLambda
    if hasattr(learner, "reset_traces"):
        learner.reset_traces()

    # run one full episode
    sc = agent.run_episode()
    scores.append(sc)

    # print detailed stats every stat_unit games
    learner.make_statistic(ep, env.b, sc, unit=stat_unit)


6-tuple, size = 16777216 (64MB)
6-tuple, size = 16777216 (64MB)
6-tuple, size = 16777216 (64MB)
6-tuple, size = 16777216 (64MB)
1000	avg = 2943.0	max = 10896.0
	32	100.0%	(0.1%)
	64	99.9%	(6.2%)
	128	93.7%	(34.6%)
	256	59.1%	(47.0%)
	512	12.1%	(12.0%)
	1024	0.1%	(0.1%)
2000	avg = 3070.4	max = 13768.0
	32	100.0%	(0.1%)
	64	99.9%	(5.1%)
	128	94.8%	(31.7%)
	256	63.1%	(51.8%)
	512	11.3%	(11.2%)
	1024	0.1%	(0.1%)
3000	avg = 3024.8	max = 13804.0
	32	100.0%	(0.1%)
	64	99.9%	(5.4%)
	128	94.5%	(36.2%)
	256	58.3%	(46.0%)
	512	12.3%	(12.1%)
	1024	0.2%	(0.2%)
4000	avg = 3038.9	max = 8332.0
	32	100.0%	(0.3%)
	64	99.7%	(4.8%)
	128	94.9%	(36.4%)
	256	58.5%	(45.2%)
	512	13.3%	(13.3%)
5000	avg = 3074.4	max = 11808.0
	32	100.0%	(0.1%)
	64	99.9%	(4.5%)
	128	95.4%	(34.3%)
	256	61.1%	(48.0%)
	512	13.1%	(13.0%)
	1024	0.1%	(0.1%)
6000	avg = 3067.2	max = 12016.0
	32	100.0%	(0.2%)
	64	99.8%	(5.8%)
	128	94.0%	(34.1%)
	256	59.9%	(46.9%)
	512	13.0%	(12.9%)
	1024	0.1%	(0.1%)
7000	avg = 3077.5	max = 11720.0
	64	100

In [3]:
scores = []
for ep in range(1, episodes + 1):
    score = agent.run_episode()
    scores.append(score)

    # use the built‑in statistic printer on your learner
    learner.make_statistic(ep, env.b, score, unit=stat_unit)

1000	avg = 1031.0	max = 3476.0
	16	100.0%	(0.4%)
	32	99.6%	(7.9%)
	64	91.7%	(37.6%)
	128	54.1%	(49.1%)
	256	5.0%	(5.0%)


KeyboardInterrupt: 