In [1]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.optimizers import Adam
from collections import deque
from IPython.display import display, clear_output
import ipywidgets as widgets
import time

# Reproducibility
tf.random.set_seed(1)
np.random.seed(1)

moves = ['rock', 'paper', 'scissors']
seq_len = 5  # how many past moves to consider

def move_to_onehot(move):
    vec = np.zeros(3, dtype=np.float32)
    vec[moves.index(move)] = 1.0
    return vec

def onehot_to_move(vec):
    return moves[np.argmax(vec)]

# Build RNN model (small & fast for online learning)
model = Sequential([
    SimpleRNN(32, input_shape=(seq_len, 3), activation='tanh'),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=0.005), loss='categorical_crossentropy', metrics=['accuracy'])

# State
user_history = deque(maxlen=1000)   # full history for stats
recent_seq = deque(maxlen=seq_len)  # last seq_len moves
# Running stats
total_rounds = 0
user_wins = 0
comp_wins = 0
ties = 0

# Widgets for Colab interactivity
out = widgets.Output(layout={'border': '1px solid black'})
btn_rock = widgets.Button(description='rock', button_style='')
btn_paper = widgets.Button(description='paper', button_style='')
btn_scissors = widgets.Button(description='scissors', button_style='')
btn_reset = widgets.Button(description='Reset & Clear', button_style='danger')

def choose_counter(pred_prob):
    # pred_prob is shape (3,) - predicted probabilities of user next move
    pred_move_idx = int(np.argmax(pred_prob))
    # choose move that beats predicted move: (pred + 1) % 3 beats it
    counter_idx = (pred_move_idx + 1) % 3
    return moves[counter_idx], pred_move_idx

def train_online(X_seq, y_onehot):
    # X_seq shape (1, seq_len, 3), y_onehot shape (1,3)
    # single small update
    model.train_on_batch(X_seq, y_onehot)

def play_round(user_move):
    global total_rounds, user_wins, comp_wins, ties

    user_history.append(user_move)
    recent_seq.append(user_move)
    total_rounds += 1

    # Predict if we have enough history
    if len(recent_seq) == seq_len:
        last_moves = np.array([move_to_onehot(m) for m in recent_seq]).reshape(1, seq_len, 3)
        pred = model.predict(last_moves, verbose=0)[0]  # probabilities for user's next move
        comp_move, pred_move_idx = choose_counter(pred)
    else:
        comp_move = np.random.choice(moves)
        pred_move_idx = None

    # Determine result
    if user_move == comp_move:
        result = "Tie"
        ties += 1
    elif (moves.index(user_move) - moves.index(comp_move)) % 3 == 1:
        result = "User wins"
        user_wins += 1
    else:
        result = "Computer wins"
        comp_wins += 1

    # Online training: if we had a sequence, train the model to predict the new user_move
    if len(recent_seq) == seq_len:
        X_seq = np.array([move_to_onehot(m) for m in recent_seq]).reshape(1, seq_len, 3)
        y_onehot = np.array([move_to_onehot(user_move)]).reshape(1,3)
        train_online(X_seq, y_onehot)

    # Display
    with out:
        clear_output(wait=True)
        print(f"Round {total_rounds}")
        print(f"You: {user_move}    Computer: {comp_move}")
        if pred_move_idx is not None:
            print(f"Model predicted you would play: {moves[pred_move_idx]}")
        print(result)
        print()
        print(f"Stats → You: {user_wins} | Computer: {comp_wins} | Ties: {ties}")
        if total_rounds > 0:
            print(f"Computer win rate: {comp_wins/total_rounds:.2%}")
        print()
        print("Tip: play repeated patterns to let the RNN learn them.")

# Button callbacks
def on_rock(b): play_round('rock')
def on_paper(b): play_round('paper')
def on_scissors(b): play_round('scissors')
def on_reset(b):
    global user_history, recent_seq, total_rounds, user_wins, comp_wins, ties, model
    user_history = deque(maxlen=1000)
    recent_seq = deque(maxlen=seq_len)
    total_rounds = user_wins = comp_wins = ties = 0
    # Reinitialize model weights (fresh start)
    # Removed incorrect attempt to reset weights directly
    # Practical way: recreate model to reset weights
    new_model = Sequential([
        SimpleRNN(32, input_shape=(seq_len, 3), activation='tanh'),
        Dense(16, activation='relu'),
        Dense(3, activation='softmax')
    ])
    new_model.compile(optimizer=Adam(learning_rate=0.005), loss='categorical_crossentropy', metrics=['accuracy'])
    # swap
    globals()['model'] = new_model
    with out:
        clear_output()
        print("Reset complete. Model reinitialized.")

btn_rock.on_click(on_rock)
btn_paper.on_click(on_paper)
btn_scissors.on_click(on_scissors)
btn_reset.on_click(on_reset)

# Layout
controls = widgets.HBox([btn_rock, btn_paper, btn_scissors, btn_reset])
display(controls)
display(out)

# Warm-up: show initial message
with out:
    print("Rock-Paper-Scissors RNN (interactive). Click a button to play. Type patterns — the RNN learns online!")

  super().__init__(**kwargs)


HBox(children=(Button(description='rock', style=ButtonStyle()), Button(description='paper', style=ButtonStyle(…

Output(layout=Layout(border='1px solid black'))