In [None]:
import os
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from rototransl_env import TrackingEnv
import random
from collections import deque
import datetime

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

NUM_NEURONS = 512   #256
LR_ACTOR = 0.0008
LR_CRITIC = 0.0008
GAMMA = 0.99
TAU = 0.005
EARLY_STOPPING_EPISODES = 30
CHECKPOINT_INTERVAL = 100

now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
RUN_DIR = f"PROVA{now}"
os.makedirs(RUN_DIR, exist_ok=True)

class PolicyNet(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(PolicyNet, self).__init__()
        self.fc1 = nn.Linear(state_dim, NUM_NEURONS)
        self.fc2 = nn.Linear(NUM_NEURONS, NUM_NEURONS)
        self.fc3 = nn.Linear(NUM_NEURONS, action_dim)
        nn.init.uniform_(self.fc3.weight, -3e-3, 3e-3)
        nn.init.uniform_(self.fc3.bias, -3e-3, 3e-3)

    def forward(self, state):
        if state.dim() == 1:
            state = state.unsqueeze(0)
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        action = self.fc3(x)
        action_xy = torch.tanh(action[:, :2]) * 5.0
        action_rot = torch.tanh(action[:, 2:3])
        return torch.cat([action_xy, action_rot], dim=1).squeeze(0)

class QNet(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(QNet, self).__init__()
        self.fc1 = nn.Linear(state_dim + action_dim, NUM_NEURONS)
        self.fc2 = nn.Linear(NUM_NEURONS, NUM_NEURONS)
        self.fc3 = nn.Linear(NUM_NEURONS, 1)

    def forward(self, state, action):
        x = torch.cat([state, action], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.relu(self.fc3(x))

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, transition):
        self.buffer.append(transition)

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        return len(self.buffer)

class DDPGAgent(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DDPGAgent, self).__init__()
        self.actor = PolicyNet(state_dim, action_dim)
        self.actor_target = PolicyNet(state_dim, action_dim)
        self.critic = QNet(state_dim, action_dim)
        self.critic_target = QNet(state_dim, action_dim)
        self.optimizer_actor = optim.Adam(self.actor.parameters(), lr=LR_ACTOR)
        self.optimizer_critic = optim.Adam(self.critic.parameters(), lr=LR_CRITIC)
        self.buffer = ReplayBuffer(50000) #statico e dinamico rumoroso
        #self.buffer = ReplayBuffer(20000) #dinamico
        self.batch_size = 128
        self.noise_std = 1.0 #0.5
        self.min_noise_std = 0.05 #0.01
        self.noise_decay = 0.9995 #0.999

    def reward_function(self, state, action, next_state, tolerance_transl, tolerance_rot, rimbalzato):
        pos = state[:2]
        target = state[3:5]              # target(t)
        next_pos = next_state[:2]        # agent(t+1)

        to_target = F.normalize(target - pos, dim=0)
        action_dir = F.normalize(action[:2], dim=0)
        direction_reward = torch.dot(action_dir, to_target)
        direction_penalty = 1.0 - direction_reward

        rot_error = torch.abs(next_state[2] - state[5])
        print(f"rot_error: {rot_error.item()}")

        
        #reward = - 5 * direction_penalty - 5 * rot_error

        reward = - 3 * rot_error

        #if torch.norm(next_state[:2] - state[3:5]) < tolerance_transl and torch.norm(next_state[2] - state[5]) < tolerance_rot:
        #    reward += 100
        
        #if torch.norm(next_state[:2] - state[3:5]) < tolerance_transl:
        #    reward += 5

        if torch.norm(next_state[2] - state[5]) < tolerance_rot:
            reward += 100   #5

        if rimbalzato:
            reward -= 100
        
        return reward - 1

    def update(self, gamma=GAMMA, tau=TAU, device='cpu'):
        if len(self.buffer) < self.batch_size:
            return
        transitions = random.sample(self.buffer.buffer, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*transitions)
        states = torch.FloatTensor(np.array(states)).to(device)
        actions = torch.FloatTensor(np.array(actions)).to(device)
        rewards = torch.FloatTensor(np.array(rewards)).unsqueeze(1).to(device)
        next_states = torch.FloatTensor(np.array(next_states)).to(device)
        dones = torch.FloatTensor(np.array(dones)).unsqueeze(1).to(device)

        with torch.no_grad():
            next_actions = self.actor_target(next_states)
            target_Q = self.critic_target(next_states, next_actions)
            y = rewards + gamma * target_Q * (1 - dones)

        current_Q = self.critic(states, actions)
        critic_loss = F.mse_loss(current_Q, y)

        self.optimizer_critic.zero_grad()
        critic_loss.backward()
        self.optimizer_critic.step()

        actor_loss = -self.critic(states, self.actor(states)).mean()
        self.optimizer_actor.zero_grad()
        actor_loss.backward()
        self.optimizer_actor.step()

        for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()):
            target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)
        for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()):
            target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)

def save_checkpoint(agent, episode):
    path = os.path.join(RUN_DIR, f"checkpoint_ep{episode}.pth")
    torch.save({
        'actor_state_dict': agent.actor.state_dict(),
        'critic_state_dict': agent.critic.state_dict()
    }, path)

def plot_and_save(rewards, successes):
    plt.figure()
    plt.plot(rewards, label='Total Reward')
    plt.plot(np.convolve(successes, np.ones(10)/10, mode='valid'), label='Success Rate (10)')
    plt.legend()
    plt.xlabel('Episode')
    plt.title('DDPG Training Progress')
    plt.savefig(os.path.join(RUN_DIR, 'training_plot.png'))
    plt.close()

def save_trajectory_plot(trajectory, target_trajectory, episode, tag="trajectory"):
    trajectory = np.array(trajectory)
    target_trajectory = np.array(target_trajectory)
    plt.figure(figsize=(5, 5))
    plt.plot(trajectory[:, 0], trajectory[:, 1], label="Agente", color='blue')
    plt.plot(target_trajectory[:, 0], target_trajectory[:, 1], label="Target", color='red')
    plt.scatter(*trajectory[0], color='green', label='Start agente', s=100)
    plt.scatter(*target_trajectory[0], color='yellow', label='Start target', s=100)
    plt.scatter(*target_trajectory[-1], color='red', label='End agente', s=100)
    plt.scatter(target_trajectory[-5:, 0], target_trajectory[-5:, 1], color='orange', label='Ultimi target', s=10)
    plt.scatter(trajectory[-5:, 0], trajectory[-5:, 1], color='purple', label='Ultimi agente', s=10)
    plt.title(f"{tag.capitalize()} - Episodio {episode}")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.legend()
    plt.grid()
    plt.axis('equal')
    #plt.savefig(os.path.join(RUN_DIR, f"{tag}_ep{episode}.png"))
    plt.show()
    plt.close()

def train_ddpg(env=None, num_episodes=10001):
    if env is None:
        env = TrackingEnv()
    state_dim = env.observation_space.shape[0]
    print(state_dim)
    action_dim = env.action_space.shape[0]
    print(action_dim)
    agent = DDPGAgent(state_dim, action_dim)
    reward_history, success_history = [], []
    counter = 0
    tolerance_transl = 0.02
    tolerance_rot = 0.00001

    for episode in range(num_episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0
        real_state = torch.tensor(state, dtype=torch.float32)
        state = torch.tensor(state, dtype=torch.float32)

        #state = state.clone()
        #state[2:4] += torch.normal(mean=0.0, std=0.005, size=(2,), device=state.device)

        agent.noise_std = max(agent.min_noise_std, agent.noise_std * agent.noise_decay)     # Exploration
        trajectory, target_trajectory = [], []
        attached_counter = 0
        total_attached_counter = 0

        while not done:
            trajectory.append(state[:2].detach().numpy())
            target_trajectory.append(state[3:5].detach().numpy())
            action = agent.actor(state).detach().numpy()
            #print(f"action: {action}")
            noise = np.random.normal(0, agent.noise_std, size=action.shape)
            noisy_action = action + noise   # Exploration
            noisy_action = np.clip(noisy_action, env.action_space.low, env.action_space.high)
            action_tensor = torch.tensor(noisy_action, dtype=torch.float32)

            next_state, _, done, truncated, _, rimbalzato = env.step(noisy_action)

            real_next_state = torch.tensor(next_state, dtype=torch.float32)
            next_state = torch.tensor(next_state, dtype=torch.float32)
            

            #next_state = next_state.clone()
            #next_state[2:4] += torch.normal(mean=0.0, std=0.005, size=(2,), device=next_state.device)

            if torch.norm(real_next_state[:2] - real_state[3:5]) < tolerance_transl:
                print("RAGGIUNTO TARGET, episodio:", episode)

            if torch.norm(real_next_state[2] - real_state[5]) < tolerance_rot:
                print("RAGGIUNTO TARGET ROTAZIONE, episodio:", episode)


            #if torch.norm(real_next_state[:2] - real_state[3:5]) < tolerance_transl and torch.norm(real_next_state[2] - real_state[5]) < tolerance_rot:
            if torch.norm(real_next_state[2] - real_state[5]) < tolerance_rot:
                total_attached_counter += 1
                attached_counter += 1
            else:
                attached_counter = 0

            reward = agent.reward_function(real_state, action_tensor, real_next_state, tolerance_transl, tolerance_rot, rimbalzato)

            if attached_counter == 1 or truncated:
                done = True

            #condition = torch.norm(real_next_state[:2] - real_state[3:5]) > tolerance_transl or torch.norm(real_next_state[2] - real_state[5]) > tolerance_rot
            #if attached_counter > 20 or truncated or (total_attached_counter > 0 and condition):
            #    done = True
            
            transition = (state.numpy(), action_tensor.numpy(), reward, next_state.numpy(), float(done))
            agent.buffer.push(transition)
            if len(agent.buffer) > 1000:
                agent.update()
            state = next_state
            real_state = real_next_state
            total_reward += reward


        if attached_counter == 1:   #> 20:
            save_trajectory_plot(trajectory, target_trajectory, episode, tag="success")
            counter += 1
            success_history.append(1)
            if counter % 100 == 0:
                save_trajectory_plot(trajectory, target_trajectory, episode, tag="success")
        else:
            success_history.append(0)

        reward_history.append(total_reward)

        if episode % 10 == 0:
            print(f"Episode {episode}, Reward: {total_reward:.2f}, Attached_counter: {attached_counter}, Total attached counter: {total_attached_counter}, Successes: {counter}")
        if episode % CHECKPOINT_INTERVAL == 0 and episode > 0:
            save_checkpoint(agent, episode)
        if episode % 50 == 0 and episode > 0:
            save_trajectory_plot(trajectory, target_trajectory, episode)

        if len(reward_history) > EARLY_STOPPING_EPISODES and np.mean(reward_history[-EARLY_STOPPING_EPISODES:]) > 2000:
           print(f"Early stopping at episode {episode}")
           save_checkpoint(agent, episode)
           save_trajectory_plot(trajectory, target_trajectory, episode)
           break

    np.save(os.path.join(RUN_DIR, 'rewards.npy'), reward_history)
    np.save(os.path.join(RUN_DIR, 'successes.npy'), success_history)
    plot_and_save(reward_history, success_history)
    env.close()
    return agent

if __name__ == "__main__":
    trained_agent = train_ddpg()


6
3
rot_error: 0.20803308486938477
rot_error: 0.235848069190979
rot_error: 0.14840292930603027
rot_error: 0.009124159812927246
rot_error: 0.14789187908172607
rot_error: 0.3119209408760071
rot_error: 0.45394521951675415
rot_error: 0.5613267421722412
rot_error: 0.6340967416763306
rot_error: 0.6807144284248352
rot_error: 0.698536217212677
rot_error: 0.6558635830879211
rot_error: 0.6084034442901611
rot_error: 0.5495246648788452
rot_error: 0.49893471598625183
rot_error: 0.45132946968078613
rot_error: 0.416618674993515
rot_error: 0.37500014901161194
rot_error: 0.3487994372844696
rot_error: 0.3138151168823242
rot_error: 0.28595784306526184
rot_error: 0.2572562098503113
rot_error: 0.24282468855381012
rot_error: 0.2258298695087433
rot_error: 0.21008269488811493
rot_error: 0.19761142134666443
rot_error: 0.17821955680847168
rot_error: 0.16305793821811676
rot_error: 0.1563364714384079
rot_error: 0.15199686586856842
rot_error: 0.14235267043113708
rot_error: 0.13609856367111206
rot_error: 0.12899233

KeyboardInterrupt: 

In [1]:
import os
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from env_rot import TrackingEnv
import random
from collections import deque
import datetime

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

NUM_NEURONS = 128
LR_ACTOR = 0.001
LR_CRITIC = 0.0008
GAMMA = 0.99
TAU = 0.005
EARLY_STOPPING_EPISODES = 30
CHECKPOINT_INTERVAL = 100

now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
RUN_DIR = f"PROVA{now}"
os.makedirs(RUN_DIR, exist_ok=True)

class PolicyNet(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(PolicyNet, self).__init__()
        self.fc1 = nn.Linear(state_dim, NUM_NEURONS)
        self.fc2 = nn.Linear(NUM_NEURONS, NUM_NEURONS)
        self.fc3 = nn.Linear(NUM_NEURONS, action_dim)
        nn.init.uniform_(self.fc3.weight, -3e-3, 3e-3)
        nn.init.uniform_(self.fc3.bias, -3e-3, 3e-3)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        action = torch.tanh(self.fc3(x))
        return action

class QNet(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(QNet, self).__init__()
        self.fc1 = nn.Linear(state_dim + action_dim, NUM_NEURONS)
        self.fc2 = nn.Linear(NUM_NEURONS, NUM_NEURONS)
        self.fc3 = nn.Linear(NUM_NEURONS, 1)

    def forward(self, state, action):
        x = torch.cat([state, action], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, transition):
        self.buffer.append(transition)

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        return len(self.buffer)

class DDPGAgent(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DDPGAgent, self).__init__()
        self.actor = PolicyNet(state_dim, action_dim)
        self.actor_target = PolicyNet(state_dim, action_dim)
        self.critic = QNet(state_dim, action_dim)
        self.critic_target = QNet(state_dim, action_dim)
        self.optimizer_actor = optim.Adam(self.actor.parameters(), lr=LR_ACTOR)
        self.optimizer_critic = optim.Adam(self.critic.parameters(), lr=LR_CRITIC)
        self.buffer = ReplayBuffer(50000) 
        self.batch_size = 128
        self.noise_std = 0.5
        self.min_noise_std = 0.01
        self.noise_decay = 0.999

    def reward_function(self, state, action, next_state, step, tolerance):
       
        rot_error = torch.norm(state[1]-next_state[0])
        reward = - rot_error.item() * 3

        if torch.norm(next_state[0] - state[1]) < tolerance:
            reward += 100
        
        return reward - 1

    def update(self, gamma=GAMMA, tau=TAU, device='cpu'):
        if len(self.buffer) < self.batch_size:
            return
        transitions = random.sample(self.buffer.buffer, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*transitions)
        states = torch.FloatTensor(np.array(states)).to(device)
        actions = torch.FloatTensor(np.array(actions)).to(device)
        rewards = torch.FloatTensor(np.array(rewards)).unsqueeze(1).to(device)
        next_states = torch.FloatTensor(np.array(next_states)).to(device)
        dones = torch.FloatTensor(np.array(dones)).unsqueeze(1).to(device)

        with torch.no_grad():
            next_actions = self.actor_target(next_states)
            target_Q = self.critic_target(next_states, next_actions)
            y = rewards + gamma * target_Q * (1 - dones)

        current_Q = self.critic(states, actions)
        critic_loss = F.mse_loss(current_Q, y)

        self.optimizer_critic.zero_grad()
        critic_loss.backward()
        self.optimizer_critic.step()

        actor_loss = -self.critic(states, self.actor(states)).mean()
        self.optimizer_actor.zero_grad()
        actor_loss.backward()
        self.optimizer_actor.step()

        for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()):
            target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)
        for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()):
            target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)

def save_checkpoint(agent, episode):
    path = os.path.join(RUN_DIR, f"checkpoint_ep{episode}.pth")
    torch.save({
        'actor_state_dict': agent.actor.state_dict(),
        'critic_state_dict': agent.critic.state_dict()
    }, path)

def plot_and_save(rewards, successes):
    plt.figure()
    plt.plot(rewards, label='Total Reward')
    plt.legend()
    plt.xlabel('Episode')
    plt.title('DDPG Training Progress')
    plt.savefig(os.path.join(RUN_DIR, 'training_plot.png'))
    plt.close()

def save_trajectory_plot(trajectory, target_trajectory, episode, tag="trajectory"):
    trajectory = np.array(trajectory)
    target_trajectory = np.array(target_trajectory)
    plt.figure(figsize=(6, 4))
    plt.plot(trajectory, label="Agente", color='blue')
    plt.plot(target_trajectory, label="Target", color='red')
    plt.scatter(0, trajectory[0], color='green', label='Start agente', s=60)
    plt.scatter(0, target_trajectory[0], color='yellow', label='Start target', s=60)
    plt.scatter(len(target_trajectory) - 1, target_trajectory[-1], color='red', label='End target', s=60)
    plt.scatter(len(trajectory) - 1, trajectory[-1], color='purple', label='End agente', s=60)
    plt.title(f"{tag.capitalize()} - Episodio {episode}")
    plt.xlabel("Step temporale")
    plt.ylabel("Valore (es. angolo)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(RUN_DIR, f"{tag}_ep{episode}.png"))
    plt.close()


def train_ddpg(env=None, num_episodes=10001):
    if env is None:
        env = TrackingEnv()
    state_dim = 2
    action_dim = 1
    agent = DDPGAgent(state_dim, action_dim)
    reward_history, success_history = [], []
    counter = 0
    tolerance = 0.01

    for episode in range(num_episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0
        real_state = torch.tensor(state, dtype=torch.float32)
        state = torch.tensor(state, dtype=torch.float32)

        # state = state.clone()
        # state[2:4] += torch.normal(mean=0.0, std=0.005, size=(2,), device=state.device)

        agent.noise_std = max(agent.min_noise_std, agent.noise_std * agent.noise_decay)     # Exploration
        trajectory, target_trajectory = [], []
        attached_counter = 0
        total_attached_counter = 0

        while not done:
            trajectory.append(state[0].detach().numpy())
            target_trajectory.append(state[1].detach().numpy())
            action = agent.actor(state).detach().numpy()#, training=True).detach().numpy()
            noise = np.random.normal(0, agent.noise_std, size=action.shape)
            noisy_action = action + noise   # Exploration
            noisy_action = np.clip(noisy_action, env.action_space.low, env.action_space.high)
            action_tensor = torch.tensor(noisy_action, dtype=torch.float32)

            next_state, _, done, truncated, _ = env.step(noisy_action)
            real_next_state = torch.tensor(next_state, dtype=torch.float32)
            next_state = torch.tensor(next_state, dtype=torch.float32)
            

            # next_state = next_state.clone()
            # next_state[2:4] += torch.normal(mean=0.0, std=0.005, size=(2,), device=next_state.device)


            if torch.norm(real_next_state[0] - real_state[1]) < tolerance:
                total_attached_counter += 1
                attached_counter += 1
            else:
                attached_counter = 0

            reward = agent.reward_function(real_state, action_tensor, real_next_state, 0, tolerance)

            if attached_counter > 20 or truncated or (total_attached_counter > 0 and torch.norm(real_next_state[0] - real_state[1]) > tolerance):
                done = True
            
            transition = (state.numpy(), action_tensor.numpy(), reward, next_state.numpy(), float(done))
            agent.buffer.push(transition)
            if len(agent.buffer) > 1000:
                agent.update()
            state = next_state
            real_state = real_next_state
            total_reward += reward

        if attached_counter > 20:
            counter += 1
            success_history.append(1)
            if counter % 100 == 0:
                save_trajectory_plot(trajectory, target_trajectory, episode, tag="success")
        else:
            success_history.append(0)

        reward_history.append(total_reward)

        if episode % 10 == 0:
            print(f"Episode {episode}, Reward: {total_reward:.2f}, Attached_counter: {attached_counter}, Total attached counter: {total_attached_counter}, Successes: {counter}")
        if episode % CHECKPOINT_INTERVAL == 0 and episode > 0:
            save_checkpoint(agent, episode)
        if episode % 50 == 0 and episode > 0:
            save_trajectory_plot(trajectory, target_trajectory, episode)

        if len(reward_history) > EARLY_STOPPING_EPISODES and np.mean(reward_history[-EARLY_STOPPING_EPISODES:]) > 2000:
           print(f"Early stopping at episode {episode}")
           save_checkpoint(agent, episode)
           save_trajectory_plot(trajectory, target_trajectory, episode)
           break

    np.save(os.path.join(RUN_DIR, 'rewards.npy'), reward_history)
    np.save(os.path.join(RUN_DIR, 'successes.npy'), success_history)
    plot_and_save(reward_history, success_history)
    env.close()
    return agent

if __name__ == "__main__":
    trained_agent = train_ddpg()


Episode 0, Reward: -130.78, Attached_counter: 0, Total attached counter: 0, Successes: 0
Episode 10, Reward: 1057.04, Attached_counter: 0, Total attached counter: 11, Successes: 0
Episode 20, Reward: 83.84, Attached_counter: 0, Total attached counter: 1, Successes: 0
Episode 30, Reward: -130.59, Attached_counter: 0, Total attached counter: 0, Successes: 0
Episode 40, Reward: -136.89, Attached_counter: 0, Total attached counter: 0, Successes: 0
Episode 50, Reward: 158.24, Attached_counter: 0, Total attached counter: 2, Successes: 0
Episode 60, Reward: -125.88, Attached_counter: 0, Total attached counter: 0, Successes: 0
Episode 70, Reward: 483.35, Attached_counter: 0, Total attached counter: 5, Successes: 0
Episode 80, Reward: 95.86, Attached_counter: 0, Total attached counter: 1, Successes: 0
Episode 90, Reward: 284.20, Attached_counter: 0, Total attached counter: 3, Successes: 0
Episode 100, Reward: -130.62, Attached_counter: 0, Total attached counter: 0, Successes: 0
Episode 110, Rew

KeyboardInterrupt: 