Plot the GDR HP tuning

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import wandb
from matplotlib.ticker import FuncFormatter
import io
import numpy as np

# Function to read and process the file
def read_and_process_file(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    
    delta_025 = []
    delta_05 = []
    delta_075 = []
    
    for i, line in enumerate(lines):
        delta, timesteps, mean_reward = line.strip().split(", ")
        delta_value = float(delta.split(": ")[1])
        timesteps_value = int(timesteps.split(": ")[1])
        mean_reward_value = float(mean_reward.split(": ")[1])
        
        if delta_value == 0.25:
            delta_025.append((timesteps_value, mean_reward_value))
        elif delta_value == 0.5:
            delta_05.append((timesteps_value, mean_reward_value))
        elif delta_value == 0.75:
            delta_075.append((timesteps_value, mean_reward_value))
    
    # Apply smoothing using a moving average
    def smooth_data(data, smooth_factor=0.05):
        smoothed_data = []
        for i in range(len(data)):
            if i == 0:
                smoothed_value = data[i][1]
            else:
                smoothed_value = smooth_factor * data[i][1] + (1 - smooth_factor) * smoothed_data[-1][1]
            smoothed_data.append((data[i][0], smoothed_value))
        return smoothed_data
    
    delta_025_smoothed = smooth_data(delta_025)[0:200]
    delta_05_smoothed = smooth_data(delta_05)[0:200]
    delta_075_smoothed = smooth_data(delta_075)[0:200]
    
    return delta_025_smoothed, delta_05_smoothed, delta_075_smoothed

# Read and process the file
delta_025_avg, delta_05_avg, delta_075_avg = read_and_process_file('models_delta_UDR_5M_seed:42/result.txt')

# Initialize Weights & Biases
wandb.init(project='nome_progetto')

# Create the matplotlib plot
plt.figure(figsize=(12, 6))

# Plot data for Delta: 0.25 with green dots
plt.plot([x[0] for x in delta_025_avg], [x[1] for x in delta_025_avg], 'go-', label='Delta: 0.25', linewidth=2, markersize=5)

# Plot data for Delta: 0.5 with blue dots
plt.plot([x[0] for x in delta_05_avg], [x[1] for x in delta_05_avg], 'bo-', label='Delta: 0.5', linewidth=2, markersize=5)

# Plot data for Delta: 0.75 with red dots
plt.plot([x[0] for x in delta_075_avg], [x[1] for x in delta_075_avg], 'ro-', label='Delta: 0.75', linewidth=2, markersize=5)

# Custom formatter function to display ticks in 'M'
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x * 1e-6)

plt.gca().xaxis.set_major_formatter(FuncFormatter(millions))

plt.xticks(fontsize=20)
plt.yticks(fontsize=17)

plt.xlabel('Timesteps', fontsize=17)
plt.ylabel('Mean Test Reward Every 50K Timesteps', fontsize=14)
plt.legend(fontsize=14)
plt.grid(True)

# Save the plot in a memory buffer
plot_buffer = io.BytesIO()
plt.savefig(plot_buffer, format='png')
plot_buffer.seek(0)

# Read the image from the buffer
img = mpimg.imread(plot_buffer)

# Log the image to Weights & Biases
wandb.log({"Mean Test Reward Plot": wandb.Image(img)})

plt.show()

# Close the matplotlib plot
plt.close()


Plot the UDR hp tuning

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import wandb
from matplotlib.ticker import FuncFormatter
import io
import numpy as np

# Function to read and process the file
def read_and_process_file(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    
    delta_025 = []
    delta_05 = []
    delta_075 = []
    
    for i, line in enumerate(lines):
        delta, timesteps, mean_reward = line.strip().split(", ")
        delta_value = float(delta.split(": ")[1])
        timesteps_value = int(timesteps.split(": ")[1])
        mean_reward_value = float(mean_reward.split(": ")[1])
        
        if delta_value == 0.25:
            delta_025.append((timesteps_value, mean_reward_value))
        elif delta_value == 0.5:
            delta_05.append((timesteps_value, mean_reward_value))
        elif delta_value == 0.75:
            delta_075.append((timesteps_value, mean_reward_value))
    
    # Apply smoothing using a moving average
    def smooth_data(data, smooth_factor=0.05):
        smoothed_data = []
        for i in range(len(data)):
            if i == 0:
                smoothed_value = data[i][1]
            else:
                smoothed_value = smooth_factor * data[i][1] + (1 - smooth_factor) * smoothed_data[-1][1]
            smoothed_data.append((data[i][0], smoothed_value))
        return smoothed_data
    
    delta_025_smoothed = smooth_data(delta_025)[0:200]
    delta_05_smoothed = smooth_data(delta_05)[0:200]
    delta_075_smoothed = smooth_data(delta_075)[0:200]
    
    return delta_025_smoothed, delta_05_smoothed, delta_075_smoothed

# Read and process the file
delta_025_avg, delta_05_avg, delta_075_avg = read_and_process_file('models_delta_UDR_5M_seed:42/result.txt')

# Initialize Weights & Biases
wandb.init(project='nome_progetto')

# Create the matplotlib plot
plt.figure(figsize=(12, 6))

# Plot data for Delta: 0.25 with green dots
plt.plot([x[0] for x in delta_025_avg], [x[1] for x in delta_025_avg], 'go-', label='Delta: 0.25', linewidth=2, markersize=5)

# Plot data for Delta: 0.5 with blue dots
plt.plot([x[0] for x in delta_05_avg], [x[1] for x in delta_05_avg], 'bo-', label='Delta: 0.5', linewidth=2, markersize=5)

# Plot data for Delta: 0.75 with red dots
plt.plot([x[0] for x in delta_075_avg], [x[1] for x in delta_075_avg], 'ro-', label='Delta: 0.75', linewidth=2, markersize=5)

# Custom formatter function to display ticks in 'M'
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x * 1e-6)

plt.gca().xaxis.set_major_formatter(FuncFormatter(millions))

plt.xticks(fontsize=20)
plt.yticks(fontsize=17)

plt.xlabel('Timesteps', fontsize=17)
plt.ylabel('Mean Test Reward Every 50K Timesteps', fontsize=14)
plt.legend(fontsize=14)
plt.grid(True)

# Save the plot in a memory buffer
plot_buffer = io.BytesIO()
plt.savefig(plot_buffer, format='png')
plot_buffer.seek(0)

# Read the image from the buffer
img = mpimg.imread(plot_buffer)

# Log the image to Weights & Biases
wandb.log({"Mean Test Reward Plot": wandb.Image(img)})

plt.show()

# Close the matplotlib plot
plt.close()


Plot the ADR pre training

In [None]:

import json
import numpy as np
import matplotlib.pyplot as plt
import wandb

# Initialize wandb
wandb.init(project="plot_project")

# File names
file_names = ['step0.json', 'step1.json', 'step2.json', 'step4.json']

# Load data from JSON files
data = []
for file_name in file_names:
    with open(file_name, 'r') as file:
        data.append(json.load(file))

# Extract data
step0_data = data[0]
step1_data = data[1]
step2_data = data[2]
step4_data = data[3][:1350]  # Trimming the last 100 values

# Adjust x values
x_step0 = [i * 10 for i in range(len(step0_data))]
x_step1 = [i * 10 for i in range(len(step1_data))]
x_step2 = [i * 10 + 3000 for i in range(len(step2_data))]
x_step4 = [i * 10 + 1500 for i in range(len(step4_data))]

# Smooth function
def smooth(data, smooth_factor=0.05):
    smoothed = []
    last = data[0]
    for point in data:
        smoothed_val = last * (1 - smooth_factor) + point * smooth_factor
        smoothed.append(smoothed_val)
        last = smoothed_val
    return smoothed

# Smooth the data
smooth_factor = 0.1
step0_data_smoothed = smooth(step0_data, smooth_factor)
step1_data_smoothed = smooth(step1_data, smooth_factor)
step2_data_smoothed = smooth(step2_data, smooth_factor)
step4_data_smoothed = smooth(step4_data, smooth_factor)

# Plot settings
line_width = 2.5  # Adjustable line width
point_size = 10   # Adjustable point size

# Create a plot
plt.figure(figsize=(10, 6))

# Plot the lines
plt.plot(x_step1, step1_data_smoothed, 'k', label='Pre-training in Source', linewidth=line_width)
plt.plot(x_step0, step0_data_smoothed, 'b', label='ADR-Source from the start', linewidth=line_width)
plt.plot(x_step4, step4_data_smoothed, 'g', label='ADR-Source after 1500 ep of pre-training', linewidth=line_width)
plt.plot(x_step2, step2_data_smoothed, 'r', label='ADR-Source after 3000 ep of pre-training', linewidth=line_width)




# Add red and green points at specific heights
height_red = 0.6  # Adjust the height of the red point (normalized)
height_green = 0.4  # Adjust the height of the green point (normalized)
plt.scatter([3000], [1029.512120969323], color='r', s=100, zorder=5)
plt.scatter([1500], [683.062863138861], color='g', s=100, zorder=5)

# Add labels and legend with increased fontsize
plt.xlabel('Episodes', fontsize=14)
plt.ylabel('Test Rewards', fontsize=14)
plt.legend(fontsize=12)

# Set tick parameters for both axes
plt.tick_params(axis='both', which='major', labelsize=14)

# Set x-axis limit
plt.xlim(0, 14000)

# Add custom x-axis ticks
current_ticks = plt.xticks()[0]
# Remove 2000 and add 1500, 3000
new_ticks = np.append(current_ticks[current_ticks != 2000], [1500, 3000])
plt.xticks(new_ticks)

# Show the plot on the screen
plt.show()

# Log the plot to wandb
wandb.log({"combined_plot": plt})

# Finish wandb session
wandb.finish()
