In [5]:
import pandas as pd
import numpy as np
import pickle

class ExperiencePool:
    """
    Experience pool for collecting trajectories.
    """
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.dones = []
 
    def add(self, state, action, reward, done):
        self.states.append(state)  # Add the current state (observation)
        self.actions.append(action)  # Add the corresponding action
        self.rewards.append(reward)  # Add the computed reward
        self.dones.append(done)  # Add whether the episode is done (0 = not done, 1 = done)
 
    def __len__(self):
        return len(self.states)

# Load your dataset from a CSV file
csv_file_path = './encoded_file.csv'  # Replace with the actual path to your dataset
df = pd.read_csv(csv_file_path)

# Define the CCA mapping
# cca_mapping = {'Cubic': 0, 'BBR': 1, 'PCC': 2}  # Example mapping for CCAs
# df['CCAs'] = df['CCAs'].map(cca_mapping)  # Map CCAs to integers

# Define the list of columns to include in the state
columns_to_use = [
    'Throughput',     # Measured throughput
    'LossRate',       # Packet loss rate
    'Latency',            # Round-trip time (RTT)
    'SendingRate',    # Sending rate of the flow
    'CCAs'            # Encoded CCA as part of the state
]

# Ensure columns are converted to appropriate data types
for column in columns_to_use:
    df[column] = df[column].astype(float)

# Initialize the experience pool
exp_pool = ExperiencePool()

# Iterate through each row to calculate rewards
for index, row in df.iterrows():
    # Create state from relevant columns
    state = np.array(row[columns_to_use], dtype=np.float32)  # Ensure state is in float32
    
    # Reward: Directly use throughput as the reward
    reward = float(row['Throughput'])
    
    # Action: Use SendingRate as the action
    action = float(row['SendingRate'])  # Treat sending rate as the action
    
    # Add to experience pool
    print("state",state)
    exp_pool.add(state=state, action=action, reward=reward, done=0)

# Save the experience pool using pickle
pickle_save_path = './llm_framework/data/exp_pools/exp_pool_with_cca_mapping.pkl'
with open(pickle_save_path, 'wb') as file:
    pickle.dump(exp_pool, file)

print(f"Done. Experience pool saved at: {pickle_save_path}")


state [9.0690030e+06 3.4533278e-03 2.6519661e-04 4.4118856e+05 2.0000000e+00]
state [1.1320464e+07 3.1052853e-04 2.7891120e-04 4.3699522e+05 2.0000000e+00]
state [1.0438632e+07 1.7193457e-04 2.5781430e-04 4.9379841e+05 2.0000000e+00]
state [1.0012920e+07 1.2196777e-04 2.8183134e-04 4.6606266e+05 2.0000000e+00]
state [1.0120072e+07 9.4984563e-05 2.7022496e-04 4.8449619e+05 2.0000000e+00]
state [9.1151600e+06 7.8384961e-05 2.7221045e-04 4.8021838e+05 2.0000000e+00]
state [8.8197680e+06 6.7437017e-05 2.7338477e-04 4.6318812e+05 2.0000000e+00]
state [9.522048e+06 5.902955e-05 2.660778e-04 5.715385e+05 2.000000e+00]
state [9.0992320e+06 5.2383468e-05 2.8466759e-04 6.0112062e+05 2.0000000e+00]
state [8.8776880e+06 4.7252190e-05 2.6931145e-04 5.9041469e+05 2.0000000e+00]
state [7.4062880e+06 4.3394662e-05 2.6953698e-04 5.2923056e+05 2.0000000e+00]
state [7.6688400e+06 4.0354033e-05 2.6757299e-04 6.1286081e+05 2.0000000e+00]
state [7.5310480e+06 3.7689791e-05 2.7709143e-04 7.4078806e+05 2.0000