# Balatro‑Lite Run Simulator: End‑to‑End Python Project

**IDE:** Jupyter Notebook (Python 3)

**Purpose.** Simulate 5‑card poker hands with simple Joker modifiers, create a dataset, explore it, and train a Random Forest to classify hand labels. This notebook demonstrates functions, loops (**including a `while` loop**), slicing, data structures (list/tuple/set/dict + methods), OOP & custom exceptions, file I/O, plotting with seaborn/matplotlib, and scikit‑learn modelling. Comments at the top of each cell explain intent.

In [None]:
# Imports & configuration (intent: set up environment and reproducibility)
from __future__ import annotations
import os, sys, time, random, logging
from collections import Counter
from typing import List, Tuple, Dict

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

CONFIG = {"rounds": 1000, "jokers": {"Doubler", "Foil"}, "seed": 42, "figdir": "figs"}
random.seed(CONFIG['seed']); np.random.seed(CONFIG['seed'])
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')
os.makedirs(CONFIG['figdir'], exist_ok=True)

print('Python:', sys.version.split()[0])
print('pandas:', pd.__version__)
print('seaborn:', sns.__version__)
print('matplotlib:', plt.matplotlib.__version__)
print('scikit-learn:', sklearn.__version__)

In [None]:
# Constants (intent: define ranks/suits and joker effects)
RANKS: Tuple[str, ...] = ('2','3','4','5','6','7','8','9','T','J','Q','K','A')
SUITS: Tuple[str, ...] = ('♠','♥','♦','♣')
RANK_VALUE: Dict[str, int] = {r: i for i, r in enumerate(RANKS, start=2)}

JOKER_EFFECTS: Dict[str, Dict] = {
    'Doubler': {'mult_x': 2},
    'Foil':    {'chips_add': 50},
    'Lucky':   {'chips_add': 10, 'chance': 0.5},
}
DEFAULT_JOKERS: set[str] = {'Doubler', 'Foil'}

In [None]:
# Exceptions (intent: explicit custom errors)
class InvalidCardError(Exception): ...
class EmptyHistoryError(Exception): ...

In [None]:
# Core functions (intent: functions, slicing, boolean logic, returns)
def about() -> None:
    """Print short project description (no args, no return)."""
    print('Balatro-Lite: simulation, EDA, and scikit-learn classification.')

def build_deck() -> List[Tuple[str, str]]:
    """Create a shuffled standard 52-card deck as list of (rank, suit) tuples."""
    deck = [(r, s) for r in RANKS for s in SUITS]
    random.shuffle(deck)
    return deck

def draw_cards(deck: List[Tuple[str,str]], n: int = 5) -> List[Tuple[str,str]]:
    """Draw n cards using slicing; raises ValueError if not enough cards."""
    if n > len(deck):
        raise ValueError('Not enough cards to draw.')
    hand = deck[:n]; del deck[:n]
    return hand

def is_straight(values: List[int]) -> bool:
    """Return True if there is any 5-run (Ace high only for simplicity)."""
    uniq = sorted(set(values))
    if len(uniq) < 5:
        return False
    for i in range(len(uniq) - 4):
        window = uniq[i:i+5]
        if window == list(range(window[0], window[0] + 5)):
            return True
    return False

def score_hand(hand: List[Tuple[str,str]]) -> Tuple[int, int, str, Dict]:
    """Score a 5-card hand; return chips, mult, label, and engineered features."""
    ranks = [r for r,_ in hand]
    suits = [s for _,s in hand]
    values = [RANK_VALUE[r] for r in ranks]
    counts = Counter(ranks)

    is_flush = len(set(suits)) == 1
    straight = is_straight(values)

    chips, mult, label = 0, 1, 'High Card'
    if straight and is_flush:              chips, mult, label = 150, 4, 'Straight Flush'
    elif 4 in counts.values():             chips, mult, label = 120, 3, 'Four of a Kind'
    elif sorted(counts.values(), reverse=True)[:2]==[3,2]: chips, mult, label = 100, 3, 'Full House'
    elif is_flush:                         chips, mult, label = 90, 2, 'Flush'
    elif straight:                         chips, mult, label = 80, 2, 'Straight'
    elif 3 in counts.values():             chips, mult, label = 60, 2, 'Three of a Kind'
    elif list(counts.values()).count(2)==2:chips, mult, label = 40, 2, 'Two Pair'
    elif 2 in counts.values():             chips, mult, label = 25, 1, 'One Pair'
    else:                                  chips = (max(values) ** 1) // 1  # ** and // evidence

    pairs = sum(1 for c in counts.values() if c == 2)
    trips = int(3 in counts.values())
    quads = int(4 in counts.values())
    feat = {
        'is_flush': int(is_flush), 'is_straight': int(straight),
        'n_unique_ranks': len(set(ranks)), 'sum_values': sum(values),
        'max_value': max(values), 'min_value': min(values),
        'pairs': pairs, 'trips': trips, 'quads': quads,
    }
    return chips, mult, label, feat

In [None]:
# Classes + Exceptions (intent: OOP and error handling)
class Joker:
    def __init__(self, name: str):
        if name not in JOKER_EFFECTS:
            raise InvalidCardError(f'Unknown joker: {name}')
        self.name = name; self.cfg = JOKER_EFFECTS[name]
    def apply(self, chips: int, mult: int) -> Tuple[int, int]:
        if 'mult_x' in self.cfg: mult *= int(self.cfg['mult_x'])
        if 'chips_add' in self.cfg:
            if 'chance' in self.cfg:
                import random as _r
                if _r.random() < self.cfg['chance']: chips += int(self.cfg['chips_add'])
            else:
                chips += int(self.cfg['chips_add'])
        return chips, mult
    def __repr__(self): return f'Joker(name={self.name})'

class Run:
    def __init__(self, jokers: set[str] | None = None, seed: int | None = 42):
        random.seed(seed); self.deck = build_deck();
        self.jokers = [Joker(j) for j in (jokers or DEFAULT_JOKERS)]; self.history: list[dict] = []
    def reset(self): self.deck = build_deck(); self.history.clear()
    def play_round(self, hand_size: int = 5) -> dict:
        try: hand = draw_cards(self.deck, hand_size)
        except ValueError:
            logging.info('Deck exhausted — reshuffling'); self.deck = build_deck(); hand = draw_cards(self.deck, hand_size)
        chips_base, mult_base, label, feat = score_hand(hand)
        chips, mult = chips_base, mult_base
        for jk in self.jokers: chips, mult = jk.apply(chips, mult)
        total = chips * mult
        result = {'hand':hand,'label':label,'chips':chips,'mult':mult,'total':total,'chips_base':chips_base,'mult_base':mult_base,'jokers':','.join(j.name for j in self.jokers), **feat}
        self.history.append(result); return result
    def __repr__(self): return f'Run(rounds={len(self.history)}, jokers={[j.name for j in self.jokers]})'

In [None]:
# Simulation with a **while loop** (intent: satisfy explicit while requirement)
def simulate(rounds: int = CONFIG['rounds'], jokers: set[str] | None = None, seed: int | None = CONFIG['seed']) -> pd.DataFrame:
    about(); t0 = time.perf_counter(); run = Run(jokers=jokers or CONFIG['jokers'], seed=seed)
    while len(run.history) < rounds: run.play_round()
    df = pd.DataFrame(run.history)
    if df.empty: raise EmptyHistoryError('Simulation produced no data.')
    ordered = ['label','total','chips','mult','chips_base','mult_base','jokers','is_flush','is_straight','pairs','trips','quads','n_unique_ranks','sum_values','max_value','min_value','hand']
    df = df[ordered]; df.to_csv('balatro_run_history.csv', index=False)
    logging.info('Saved CSV: balatro_run_history.csv; rounds=%d; time=%.2fs', len(df), time.perf_counter()-t0); return df

df = simulate(); df.head()

In [None]:
# Optional File I/O proof (re-import CSV safely)
try:
    df2 = pd.read_csv('balatro_run_history.csv'); assert len(df2) == len(df)
    print('CSV re-import OK:', len(df2), 'rows')
except Exception as e:
    print('CSV re-import failed:', e)

In [None]:
# EDA (plots saved to ./figs)
sns.set_theme()
plt.figure(); sns.histplot(data=df, x='total', bins=30); plt.title('Distribution of Round Totals'); plt.tight_layout(); plt.savefig(os.path.join(CONFIG['figdir'],'totals_hist.png'), dpi=150)
plt.figure(); order = df['label'].value_counts().index; sns.countplot(data=df, x='label', order=order); plt.title('Hand Type Frequency'); plt.xticks(rotation=45, ha='right'); plt.tight_layout(); plt.savefig(os.path.join(CONFIG['figdir'],'label_counts.png'), dpi=150)
plt.figure(); sns.scatterplot(data=df, x='chips', y='mult', hue='label', alpha=0.6); plt.title('Chips vs Multiplier by Hand Label'); plt.tight_layout(); plt.savefig(os.path.join(CONFIG['figdir'],'chips_vs_mult.png'), dpi=150)

In [None]:
# ML (Random Forest + rare-class handling)
FEATURES = ['is_flush','is_straight','pairs','trips','quads','n_unique_ranks','sum_values','max_value','min_value']
TARGET = 'label'
vc = df[TARGET].value_counts(); rare = vc[vc < 2].index.tolist(); df_ml = df.copy()
if rare:
    df_ml[TARGET] = df_ml[TARGET].where(~df_ml[TARGET].isin(rare), 'Rare')
    vc2 = df_ml[TARGET].value_counts(); keep = vc2[vc2 >= 2].index; df_ml = df_ml[df_ml[TARGET].isin(keep)]
X = df_ml[FEATURES].values; y = df_ml[TARGET].values
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.25, random_state=CONFIG['seed'])
clf = RandomForestClassifier(n_estimators=200, random_state=CONFIG['seed']); clf.fit(X_train, y_train)
y_pred = clf.predict(X_test); print(classification_report(y_test, y_pred, zero_division=0))
cm = confusion_matrix(y_test, y_pred, labels=clf.classes_); cm_df = pd.DataFrame(cm, index=clf.classes_, columns=clf.classes_)
plt.figure(); sns.heatmap(cm_df, annot=True, fmt='d'); plt.title('Confusion Matrix (Hand Label Prediction)'); plt.tight_layout(); plt.savefig(os.path.join(CONFIG['figdir'],'cm_heatmap.png'), dpi=150)
imp = pd.Series(clf.feature_importances_, index=FEATURES).sort_values(ascending=False); plt.figure(); sns.barplot(x=imp.values, y=imp.index); plt.title('Feature Importances (RandomForest)'); plt.tight_layout(); plt.savefig(os.path.join(CONFIG['figdir'],'feature_importances.png'), dpi=150)

In [None]:
# Methods demo (explicit data-structure methods)
demo_list = [1,2]; demo_list.extend([3])
demo_set = {'A'}; demo_set.add('B')
demo_dict = {'x': 1}; missing = demo_dict.get('y', 0)
demo_list, demo_set, demo_dict, missing

In [None]:
# Sanity tests (mini unit-tests)
assert is_straight([2,3,4,5,6]) is True
assert is_straight([2,4,5,6,7]) is False
try:
    _ = Run(jokers={'Nope'}).play_round()
    raise AssertionError('Invalid joker should have raised')
except InvalidCardError:
    pass
assert {'label','total','chips','mult'}.issubset(df.columns)
assert len(df) == CONFIG['rounds']
print('Sanity tests passed.')