In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt
from env        import Game2048Env
from features   import pattern
from learners   import FeatureTD0Learner
from agent      import RLAgent

In [None]:
seed       = 42
episodes   = 10000
alpha      = 0.1        # step size for TD(0)
gamma      = 0.99
epsilon    = 0.01
decay      = 0.999
eps_min    = 0.001
ascii      = False
gui        = False
stat_unit  = 1000       # print stats every 1000 episodes
save_path  = "feat_td0.pkl"

# ─── 2) Seed RNGs ──────────────────────────────────────────────────────────────
random.seed(seed)
np.random.seed(seed)

# ─── 3) Build environment ────────────────────────────────────────────────────
env = Game2048Env(seed=seed, ascii_render=ascii, gui=gui)

# ─── 4) Instantiate and configure the learner ────────────────────────────────
learner = FeatureTD0Learner(alpha=alpha, gamma=gamma, sparse=True)

tuples = [
    [0,1,2,3,4,5],
    [4,5,6,7,8,9],
    [0,1,2,4,5,6],
    [4,5,6,8,9,10]
]
for patt in tuples:
    learner.add_feature(pattern(patt, iso=8))

agent = RLAgent(env, learner, epsilon=epsilon, decay=decay, eps_min=eps_min)

# 4) Training loop
scores = []
for ep in range(1, episodes+1):
    # reset eligibility traces if using TDLambda
    if hasattr(learner, "reset_traces"):
        learner.reset_traces()

    # run one full episode
    sc = agent.run_episode()
    scores.append(sc)

    # print detailed stats every stat_unit games
    learner.make_statistic(ep, env.b, sc, unit=stat_unit)


TypeError: object.__init__() takes exactly one argument (the instance to initialize)

In [3]:
scores = []
for ep in range(1, episodes + 1):
    score = agent.run_episode()
    scores.append(score)

    # use the built‑in statistic printer on your learner
    learner.make_statistic(ep, env.b, score, unit=stat_unit)

1000	avg = 1031.0	max = 3476.0
	16	100.0%	(0.4%)
	32	99.6%	(7.9%)
	64	91.7%	(37.6%)
	128	54.1%	(49.1%)
	256	5.0%	(5.0%)


KeyboardInterrupt: 