<center>

# **22AIE401 - Reinforcement Learning**  
# **Lab 9**  

</center>

### Team Members:
- Guruprasath M R - AIE22015  
- Rudraksh Mohanty - AIE22046  
- Shree Prasad M - AIE22050  
- Tharun Kaarthik G K - AIE22062  

---

### Objective:
Transfer Learning in Autonomous Farming To demonstrate the power of Transfer Learning in reinforcement learning by adapting a crop-monitoring robot’s navigation policy from one field (Field A) to a new field (Field B) with different crop layouts and hazards. The goal is to minimize time and damage while scanning all rows and reaching the base station.


---

### Problem Statement:
An autonomous robot has been trained to navigate Field A, which is organized with evenly spaced crop rows and water puddles (hazards). The robot must learn the most efficient route to: Visit all inspection checkpoints (marked on certain crops), Avoid water puddles, Reach the base station to upload data. Now, the robot is transferred to Field B, where: Crop rows are curved or uneven,  New puddles and rocks appear. The base station is in a different location.
 


---

## Original Code

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import matplotlib.patches as patches

# Environment settings
GRID_SIZE = 8
CHECKPOINTS_A = {(2, 2), (4, 4), (6, 1)}
CHECKPOINTS_B = {(1, 3), (5, 5), (6, 2)}
PUDDLES_A = {(3, 3), (5, 5), (2, 6)}
PUDDLES_B = {(2, 2), (4, 5), (3, 6)}
OBSTACLES = {(1, 1), (6, 6)}
BASE_A = (7, 7)
BASE_B = (0, 7)
ACTIONS = ['U', 'D', 'L', 'R']
ACTION_MAP = {'U': (-1, 0), 'D': (1, 0), 'L': (0, -1), 'R': (0, 1)}

# Q-learning parameters
EPISODES = 300
ALPHA = 0.1
GAMMA = 0.9
EPSILON = 0.2
MAX_STEPS = 200

def is_valid(state):
    x, y = state
    return 0 <= x < GRID_SIZE and 0 <= y < GRID_SIZE and state not in OBSTACLES

def step(state, action, config, visited_checkpoints):
    dx, dy = ACTION_MAP[action]
    next_state = (state[0] + dx, state[1] + dy)
    if not is_valid(next_state):
        next_state = state
    reward = -1
    if next_state in config["puddles"]:
        reward = -5
    elif next_state in config["checkpoints"] and next_state not in visited_checkpoints:
        reward = 5
        visited_checkpoints.add(next_state)
    elif next_state == config["base"] and visited_checkpoints == config["checkpoints"]:
        reward = 10
    return next_state, reward, visited_checkpoints

def select_action(Q, state):
    if np.random.rand() < EPSILON or state not in Q:
        return np.random.choice(ACTIONS)
    return max(Q[state], key=Q[state].get)

def train(config, Q=None):
    if Q is None:
        Q = {}
    for ep in range(EPISODES):
        state = (0, 0)
        visited_checkpoints = set()
        for _ in range(MAX_STEPS):
            if state not in Q:
                Q[state] = {a: 0 for a in ACTIONS}
            action = select_action(Q, state)
            next_state, reward, visited_checkpoints = step(state, action, config, visited_checkpoints)
            if next_state not in Q:
                Q[next_state] = {a: 0 for a in ACTIONS}
            Q[state][action] += ALPHA * (
                reward + GAMMA * max(Q[next_state].values()) - Q[state][action]
            )
            if next_state == config["base"] and visited_checkpoints == config["checkpoints"]:
                break
            state = next_state
    return Q

# Configurations
config_A = {"checkpoints": CHECKPOINTS_A, "puddles": PUDDLES_A, "base": BASE_A}
config_B = {"checkpoints": CHECKPOINTS_B, "puddles": PUDDLES_B, "base": BASE_B}

# Train in Field A
print("Training in Field A...")
Q_A = train(config_A)

# Save and load Q-table
with open("Q_fieldA.pkl", "wb") as f:
    pickle.dump(Q_A, f)
with open("Q_fieldA.pkl", "rb") as f:
    Q_loaded = pickle.load(f)

# Transfer to Field B
print("Transferring to Field B...")
Q_transfer = train(config_B, Q=Q_loaded)

# Enhanced visualization function
def plot_policy(Q, config, title):
    grid = np.full((GRID_SIZE, GRID_SIZE), '.', dtype='<U1')
    arrows = {'U': '↑', 'D': '↓', 'L': '←', 'R': '→'}
    
    for i in range(GRID_SIZE):
        for j in range(GRID_SIZE):
            pos = (i, j)
            if pos in OBSTACLES:
                grid[i][j] = '■'
            elif pos in config["puddles"]:
                grid[i][j] = '~'
            elif pos in config["checkpoints"]:
                grid[i][j] = '✓'
            elif pos == config["base"]:
                grid[i][j] = '⌂'
            elif pos in Q:
                best_a = max(Q[pos], key=Q[pos].get)
                grid[i][j] = arrows[best_a]
    
    print(f"\n{title}:")
    print("Legend: ■=Obstacle, ~=Puddle, ✓=Checkpoint, ⌂=Base, Arrows=Policy")
    for row in grid:
        print(' '.join(f'{cell:>2}' for cell in row))
    
    # Create beautiful matplotlib visualization
    fig, ax = plt.subplots(figsize=(12, 10))
    ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
    
    # Create color-coded table
    colors = []
    for i in range(GRID_SIZE):
        row_colors = []
        for j in range(GRID_SIZE):
            pos = (i, j)
            if pos in OBSTACLES:
                row_colors.append('#808080')  # Gray
            elif pos in config["puddles"]:
                row_colors.append('#4FC3F7')  # Light Blue
            elif pos in config["checkpoints"]:
                row_colors.append('#81C784')  # Light Green
            elif pos == config["base"]:
                row_colors.append('#F48FB1')  # Light Pink
            else:
                row_colors.append('#FFFFFF')  # White
        colors.append(row_colors)
    
    table = ax.table(cellText=grid, loc='center', cellLoc='center', 
                     cellColours=colors, bbox=[0, 0, 1, 1])
    table.auto_set_font_size(False)
    table.set_fontsize(14)
    table.scale(1.2, 1.2)
    
    # Style the table
    for (i, j), cell in table.get_celld().items():
        cell.set_linewidth(2)
        cell.set_edgecolor('#333333')
        cell.set_text_props(weight='bold')
    
    ax.axis('off')
    plt.tight_layout()
    plt.show()

def visualize_policy(Q, config, title="Policy Visualization"):
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.set_xlim(0, GRID_SIZE)
    ax.set_ylim(0, GRID_SIZE)
    ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
    ax.set_xticks(np.arange(0, GRID_SIZE+1, 1))
    ax.set_yticks(np.arange(0, GRID_SIZE+1, 1))
    ax.grid(True, alpha=0.3)
    
    # Add labels
    ax.set_xlabel('X Coordinate', fontsize=12)
    ax.set_ylabel('Y Coordinate', fontsize=12)
    
    for i in range(GRID_SIZE):
        for j in range(GRID_SIZE):
            state = (i, j)
            facecolor = 'white'
            alpha = 0.8
            
            if state in OBSTACLES:
                facecolor = 'gray'
            elif state in config["puddles"]:
                facecolor = 'lightblue'
            elif state in config["checkpoints"]:
                facecolor = 'lightgreen'
            elif state == config["base"]:
                facecolor = 'lightcoral'
            
            rect = patches.Rectangle((j, GRID_SIZE-i-1), 1, 1, 
                                   linewidth=1.5, edgecolor='black', 
                                   facecolor=facecolor, alpha=alpha)
            ax.add_patch(rect)
            
            # Add text labels
            if state in OBSTACLES:
                ax.text(j+0.5, GRID_SIZE-i-0.5, 'OBS', ha='center', va='center', fontweight='bold')
            elif state in config["puddles"]:
                ax.text(j+0.5, GRID_SIZE-i-0.5, 'PUD', ha='center', va='center', fontweight='bold')
            elif state in config["checkpoints"]:
                ax.text(j+0.5, GRID_SIZE-i-0.5, 'CHK', ha='center', va='center', fontweight='bold')
            elif state == config["base"]:
                ax.text(j+0.5, GRID_SIZE-i-0.5, 'BASE', ha='center', va='center', fontweight='bold')
            
            # Add policy arrows
            if state in Q:
                best_action = max(Q[state], key=Q[state].get)
                dx, dy = {'U': (0, 0.3), 'D': (0, -0.3), 'L': (-0.3, 0), 'R': (0.3, 0)}[best_action]
                ax.arrow(j + 0.5, GRID_SIZE - i - 0.5, dx, dy, 
                        head_width=0.15, head_length=0.1, fc='red', ec='red', 
                        linewidth=2, alpha=0.8)
    
    plt.gca().set_aspect('equal', adjustable='box')
    plt.tight_layout()
    plt.show()

# Plot policies with enhanced visualizations
print("\n" + "="*60)
print("POLICY VISUALIZATIONS")
print("="*60)

plot_policy(Q_A, config_A, "Field A Policy (Original Training)")
plot_policy(Q_transfer, config_B, "Policy after Transfer Learning in Field B")

# Display policy and table
print("\n" + "="*60)
print("Q-TABLE ANALYSIS")
print("="*60)

df = pd.DataFrame.from_dict({k: max(v, key=v.get) for k, v in Q_transfer.items()}, 
                           orient='index', columns=['Best Action'])
print("Best Actions for each state in Field B (Transfer Learning):")
print(df.head(10))

print(f"\nTotal states learned: {len(Q_transfer)}")
print(f"Action distribution:")
action_counts = df['Best Action'].value_counts()
for action, count in action_counts.items():
    print(f"  {action}: {count} states ({count/len(df)*100:.1f}%)")

# Enhanced grid visualizations
print("\n" + "="*60)
print("ENHANCED GRID VISUALIZATIONS")
print("="*60)

visualize_policy(Q_A, config_A, title="Autonomous Farming Policy in Field A")
visualize_policy(Q_transfer, config_B, title="Autonomous Farming Transfer Policy in Field B")

print("\nVisualization Complete!")

### TASK-1
Update the environment so that survivors randomly change locations every few episodes. The robot must relearn paths efficiently using prioritized sweeping instead of starting from scratch.

### TASK-2
Design certain cells to turn into traps mid-episode to simulate collapsing floors. Add a probabilistic element to cell stability, forcing the robot to quickly re-prioritize states with high error. 

### TASK-3
Extend the setup to multiple rescue robots. Each robot learns independently but shares knowledge of dangerous zones. Coordinate their learning to minimize path overlap and maximize coverage. 
