#  Data Collection: Load Dataset

In [1]:
!gdown '1KEpEe1kFHb0T_fk03DVl8em1QPIX0xwQ'

Downloading...
From: https://drive.google.com/uc?id=1KEpEe1kFHb0T_fk03DVl8em1QPIX0xwQ
To: /content/kaggle.json
  0% 0.00/70.0 [00:00<?, ?B/s]100% 70.0/70.0 [00:00<00:00, 344kB/s]


In [2]:
!pip install -q kaggle

In [3]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [4]:
!kaggle datasets download --force derrickmwiti/google-2019-cluster-sample


Downloading google-2019-cluster-sample.zip to /content
 96% 93.0M/96.7M [00:00<00:00, 141MB/s]
100% 96.7M/96.7M [00:00<00:00, 126MB/s]


In [5]:
!unzip -o /content/google-2019-cluster-sample.zip

Archive:  /content/google-2019-cluster-sample.zip
  inflating: borg_traces_data.csv    


In [6]:
import pandas as pd

In [7]:
cluster_data_df = pd.read_csv('/content/borg_traces_data.csv')

# Exploratory Data Analysis (EDA)

In [8]:
from prettytable import PrettyTable

In [9]:
# Display the shape of the dataset
table = PrettyTable()
table.field_names = ["Description", "Value"]
table.add_row(["Data Shape", str(cluster_data_df.shape)])
print(table)
print("\n")

+-------------+--------------+
| Description |    Value     |
+-------------+--------------+
|  Data Shape | (405894, 34) |
+-------------+--------------+




In [10]:
from prettytable import PrettyTable

# Extract categorical columns
categorical_columns = cluster_data_df.select_dtypes(include=['object', 'category']).columns
categorical_columns

Index(['resource_request', 'constraint', 'user', 'collection_name',
       'collection_logical_name', 'start_after_collection_ids',
       'average_usage', 'maximum_usage', 'random_sample_usage',
       'cpu_usage_distribution', 'tail_cpu_usage_distribution', 'event'],
      dtype='object')

In [11]:
from prettytable import PrettyTable

# Create a PrettyTable object
classification_table = PrettyTable()

# Add a header to the table
classification_table.field_names = ["Column Name", "Data Type", "Classification"]

# Function to classify columns as categorical or numerical
def classify_column(data_type):
    if data_type in ['object', 'category']:
        return 'categorical'
    else:
        return 'numerical'

# Loop through each column and add a row to the table for each one
for column_name, data_type in zip(cluster_data_df.columns, cluster_data_df.dtypes):
    classification = classify_column(data_type)
    classification_table.add_row([column_name, str(data_type), classification])

# Print the table
print(classification_table)


+---------------------------------+-----------+----------------+
|           Column Name           | Data Type | Classification |
+---------------------------------+-----------+----------------+
|            Unnamed: 0           |   int64   |   numerical    |
|               time              |   int64   |   numerical    |
|       instance_events_type      |   int64   |   numerical    |
|          collection_id          |   int64   |   numerical    |
|         scheduling_class        |   int64   |   numerical    |
|         collection_type         |   int64   |   numerical    |
|             priority            |   int64   |   numerical    |
|       alloc_collection_id       |   int64   |   numerical    |
|          instance_index         |   int64   |   numerical    |
|            machine_id           |   int64   |   numerical    |
|         resource_request        |   object  |  categorical   |
|            constraint           |   object  |  categorical   |
|     collections_events_

In [12]:
# Import the necessary library
import pandas as pd

# List all columns and their data types
data_info = cluster_data_df.info()

# If you want to store the information in a variable instead of printing it,
# you can use the following code:
import io
import sys

# Backup the standard output
backup_stdout = sys.stdout

# Create a new standard output as a string buffer
sys.stdout = io.StringIO()

# Get the info
cluster_data_df.info()

# Get the info string from the standard output buffer
data_info_str = sys.stdout.getvalue()

# Restore the standard output to its original value
sys.stdout = backup_stdout


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 405894 entries, 0 to 405893
Data columns (total 34 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   Unnamed: 0                       405894 non-null  int64  
 1   time                             405894 non-null  int64  
 2   instance_events_type             405894 non-null  int64  
 3   collection_id                    405894 non-null  int64  
 4   scheduling_class                 405894 non-null  int64  
 5   collection_type                  405894 non-null  int64  
 6   priority                         405894 non-null  int64  
 7   alloc_collection_id              405894 non-null  int64  
 8   instance_index                   405894 non-null  int64  
 9   machine_id                       405894 non-null  int64  
 10  resource_request                 405120 non-null  object 
 11  constraint                       405894 non-null  object 
 12  co

In [13]:
from prettytable import PrettyTable

# Describe all columns
description = cluster_data_df.describe(include='all')

# Create a pretty table to display the description
table = PrettyTable()

# Add the column names as the first row header
table.field_names = [""] + list(description.columns)

# Add each row of the description to the table
for index, row in description.iterrows():
    table.add_row([index] + list(row))

print("Description of All Columns:")
print(table)


Description of All Columns:
+--------+--------------------+------------------------+----------------------+--------------------+--------------------+---------------------+--------------------+---------------------+--------------------+--------------------+-----------------------------------------------------------+------------+-------------------------+----------------------------------------------+----------------------------------------------+----------------------------------------------+----------------------------+--------------------+--------------------+-------------------+--------------------+----------------------------------------------+----------------------------------------------+-------------------------------+----------------------+-----------------------+------------------------+---------------------------------+----------------------+------------------------------------+------------------------------+--------------------+--------+---------------------+
|        |     U

In [14]:
# Display the first few rows of the data
# Convert the first few rows of the dataframe to a list of lists
data_as_list = cluster_data_df.head().values.tolist()

# Printing the columns
print("Columns:")
print(cluster_data_df.columns.tolist())
print("\n")

print("First 5 rows of the data:")
for row in data_as_list:
    print(row)
print("\n")



Columns:
['Unnamed: 0', 'time', 'instance_events_type', 'collection_id', 'scheduling_class', 'collection_type', 'priority', 'alloc_collection_id', 'instance_index', 'machine_id', 'resource_request', 'constraint', 'collections_events_type', 'user', 'collection_name', 'collection_logical_name', 'start_after_collection_ids', 'vertical_scaling', 'scheduler', 'start_time', 'end_time', 'average_usage', 'maximum_usage', 'random_sample_usage', 'assigned_memory', 'page_cache_memory', 'cycles_per_instruction', 'memory_accesses_per_instruction', 'sample_rate', 'cpu_usage_distribution', 'tail_cpu_usage_distribution', 'cluster', 'event', 'failed']


First 5 rows of the data:
[0, 0, 2, 94591244395, 3, 1, 200, 0, 144, 168846390496, "{'cpus': 0.020660400390625, 'memory': 0.014434814453125}", '[]', 2, 'fn8Ve4Tdl/FVVvwXFGIKe4+Wo4zLjUL/557qdFVYu5M=', 'Hzsv/gF8CPQXdqpsfovDTC1TJNyphDxPu7vaTeNxA74=', 'YCuhYrnORLiUh9WGL5q5tkBevfwtucSnFr2qPZh6Kes=', '[]', 1.0, 0.0, 274800000000, 275100000000, "{'cpus': 0.0046

In [15]:
# Get the data types of each column
table = PrettyTable()
table.field_names = ["Column", "Data Type"]
for column, dtype in cluster_data_df.dtypes.items():
    table.add_row([column, str(dtype)])
print(table)
print("\n")

+---------------------------------+-----------+
|              Column             | Data Type |
+---------------------------------+-----------+
|            Unnamed: 0           |   int64   |
|               time              |   int64   |
|       instance_events_type      |   int64   |
|          collection_id          |   int64   |
|         scheduling_class        |   int64   |
|         collection_type         |   int64   |
|             priority            |   int64   |
|       alloc_collection_id       |   int64   |
|          instance_index         |   int64   |
|            machine_id           |   int64   |
|         resource_request        |   object  |
|            constraint           |   object  |
|     collections_events_type     |   int64   |
|               user              |   object  |
|         collection_name         |   object  |
|     collection_logical_name     |   object  |
|    start_after_collection_ids   |   object  |
|         vertical_scaling        |  flo

In [16]:
# Descriptive statistics
desc_stats = cluster_data_df.describe()

# Create a pretty table
pt = PrettyTable()

# Set the column names (Field names in PrettyTable)
pt.field_names = [""] + desc_stats.columns.tolist()

# Add rows to the table
for index, row in desc_stats.iterrows():
    pt.add_row([index] + row.tolist())

print("Descriptive Statistics for Numerical Columns:")
print(pt)
print("\n")


Descriptive Statistics for Numerical Columns:
+-------+--------------------+------------------------+----------------------+--------------------+--------------------+---------------------+--------------------+---------------------+--------------------+--------------------+-------------------------+--------------------+--------------------+-------------------+--------------------+----------------------+-----------------------+------------------------+---------------------------------+----------------------+--------------------+---------------------+
|       |     Unnamed: 0     |          time          | instance_events_type |   collection_id    |  scheduling_class  |   collection_type   |      priority      | alloc_collection_id |   instance_index   |     machine_id     | collections_events_type |  vertical_scaling  |     scheduler      |     start_time    |      end_time      |   assigned_memory    |   page_cache_memory   | cycles_per_instruction | memory_accesses_per_instruction |   

In [17]:
# Count of unique values for each column
table = PrettyTable()
table.field_names = ["Column", "Unique Values Count"]
for column in cluster_data_df.columns:
    table.add_row([column, cluster_data_df[column].nunique()])
print(table)
print("\n")

+---------------------------------+---------------------+
|              Column             | Unique Values Count |
+---------------------------------+---------------------+
|            Unnamed: 0           |        405894       |
|               time              |        334354       |
|       instance_events_type      |          10         |
|          collection_id          |         4057        |
|         scheduling_class        |          4          |
|         collection_type         |          2          |
|             priority            |          22         |
|       alloc_collection_id       |         448         |
|          instance_index         |        30298        |
|            machine_id           |        96174        |
|         resource_request        |        21900        |
|            constraint           |          78         |
|     collections_events_type     |          10         |
|               user              |         898         |
|         coll

In [18]:
# Count of missing values for each column
table = PrettyTable()
table.field_names = ["Column", "Missing Values Count"]
for column in cluster_data_df.columns:
    table.add_row([column, cluster_data_df[column].isnull().sum()])
print(table)
print("\n")

+---------------------------------+----------------------+
|              Column             | Missing Values Count |
+---------------------------------+----------------------+
|            Unnamed: 0           |          0           |
|               time              |          0           |
|       instance_events_type      |          0           |
|          collection_id          |          0           |
|         scheduling_class        |          0           |
|         collection_type         |          0           |
|             priority            |          0           |
|       alloc_collection_id       |          0           |
|          instance_index         |          0           |
|            machine_id           |          0           |
|         resource_request        |         774          |
|            constraint           |          0           |
|     collections_events_type     |          0           |
|               user              |          0          

In [19]:
from prettytable import PrettyTable

# Extract numerical columns
numerical_columns = cluster_data_df.select_dtypes(include=['float64', 'int64']).columns

# Calculate the correlation matrix
correlation_matrix = cluster_data_df[numerical_columns].corr()

# Create a pretty table to display the correlation matrix
table = PrettyTable()

# Add the column names as the first row header
table.field_names = [""] + list(correlation_matrix.columns)

# Add each row of the matrix to the table
for index, row in correlation_matrix.iterrows():
    table.add_row([index] + list(row))

print("Correlation Matrix for Numerical Columns:")
print(table)
print("\n")


Correlation Matrix for Numerical Columns:
+---------------------------------+-------------------------+------------------------+-----------------------+------------------------+-----------------------+-----------------------+-----------------------+------------------------+------------------------+------------------------+-------------------------+------------------------+------------------------+------------------------+-------------------------+-----------------------+-----------------------+------------------------+---------------------------------+------------------------+-----------------------+------------------------+
|                                 |        Unnamed: 0       |          time          |  instance_events_type |     collection_id      |    scheduling_class   |    collection_type    |        priority       |  alloc_collection_id   |     instance_index     |       machine_id       | collections_events_type |    vertical_scaling    |       scheduler        |       st

In [20]:
threshold = 0.7
highly_correlated_pairs = []

# Loop over the correlation matrix and identify pairs which surpass the threshold
for column in correlation_matrix.columns:
    for index in correlation_matrix.index:
        if column != index:  # Skip self-correlations
            if abs(correlation_matrix.loc[index, column]) > threshold:
                pair = tuple(sorted([index, column]))  # Sort the pair for uniqueness
                if pair not in highly_correlated_pairs:
                    highly_correlated_pairs.append(pair)

# Print the highly correlated pairs
table_corr = PrettyTable()
table_corr.field_names = ["Feature 1", "Feature 2", "Correlation Value"]

for pair in highly_correlated_pairs:
    corr_val = correlation_matrix.loc[pair[0], pair[1]]
    table_corr.add_row([pair[0], pair[1], round(corr_val, 2)])

print("Highly Correlated Features:")
print(table_corr)
print("\n")


Highly Correlated Features:
+-------------------------+---------------------------------+-------------------+
|        Feature 1        |            Feature 2            | Correlation Value |
+-------------------------+---------------------------------+-------------------+
| collections_events_type |       instance_events_type      |        1.0        |
|   alloc_collection_id   |             priority            |        0.71       |
|         end_time        |            start_time           |        1.0        |
|  cycles_per_instruction | memory_accesses_per_instruction |        0.73       |
+-------------------------+---------------------------------+-------------------+




Here are some considerations based on the columns in your dataset:

Target Variable(s):

average_usage and maximum_usage might be potential target variables as they reflect the load or usage of resources.

Features:

Time-related features: time, start_time, and end_time.

Resource request and constraints: resource_request, constraint.

System configurations: scheduling_class, priority, alloc_collection_id, instance_index, machine_id.

Other performance metrics: assigned_memory, page_cache_memory, cycles_per_instruction, memory_accesses_per_instruction.

# Data Preperation and Pre-Processing

In [21]:
# Print the table
print(classification_table)

+---------------------------------+-----------+----------------+
|           Column Name           | Data Type | Classification |
+---------------------------------+-----------+----------------+
|            Unnamed: 0           |   int64   |   numerical    |
|               time              |   int64   |   numerical    |
|       instance_events_type      |   int64   |   numerical    |
|          collection_id          |   int64   |   numerical    |
|         scheduling_class        |   int64   |   numerical    |
|         collection_type         |   int64   |   numerical    |
|             priority            |   int64   |   numerical    |
|       alloc_collection_id       |   int64   |   numerical    |
|          instance_index         |   int64   |   numerical    |
|            machine_id           |   int64   |   numerical    |
|         resource_request        |   object  |  categorical   |
|            constraint           |   object  |  categorical   |
|     collections_events_

In [22]:
cluster_data_df.drop_duplicates(inplace=True)

# Feature Selection and Engineering

In [23]:
'''
Handling Missing Values:
For columns with a high percentage of missing values, we might consider dropping them.
For other columns, you might want to fill in the missing values with an appropriate strategy
(mean, median, mode, or a specific value).

'''

threshold = 0.7  # For instance, if 70% of data is missing
missing_data_ratio = cluster_data_df.isnull().sum() / len(cluster_data_df)
cols_to_drop = missing_data_ratio[missing_data_ratio > threshold].index
cluster_data_df.drop(columns=cols_to_drop, inplace=True)

# For numerical columns, fill missing values with the median
for column in numerical_columns:
    cluster_data_df[column].fillna(cluster_data_df[column].median(), inplace=True)

# For categorical columns, fill missing values with the mode
for column in categorical_columns:
    cluster_data_df[column].fillna(cluster_data_df[column].mode()[0], inplace=True)


In [24]:
'''
Dropping Low Variance Columns:
Columns with very low variance might not add much value.
'''
from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold(threshold=0.01)  # A threshold of 0.01 for variance
selector.fit(cluster_data_df[numerical_columns])
cols_to_drop = cluster_data_df[numerical_columns].columns[~selector.get_support()]
cluster_data_df.drop(columns=cols_to_drop, inplace=True)


In [25]:
'''
Feature Engineering:
Normalization/Standardization of Numerical Columns:
Since RL algorithms (especially deep RL) are sensitive to
the scale of the data, we will standardize the numerical features.
'''

from sklearn.preprocessing import StandardScaler

# Filtering columns that are actually present in the DataFrame
valid_numerical_columns = [col for col in numerical_columns if col in cluster_data_df.columns]

# Applying the StandardScaler on the valid columns
scaler = StandardScaler()
cluster_data_df[valid_numerical_columns] = scaler.fit_transform(cluster_data_df[valid_numerical_columns])


In [26]:
'''
Encoding Categorical Features:
we might want to convert categorical features into a format that can be
provided to machine learning algorithms to improve their performance.
'''

for column in categorical_columns:
    dummies = pd.get_dummies(cluster_data_df[column], prefix=column, sparse=True)
    cluster_data_df = pd.concat([cluster_data_df, dummies], axis=1)
    cluster_data_df.drop(column, axis=1, inplace=True)



# Next Steps:

Now that we have loaded the data, conducted exploratory data analysis (EDA), and pre-processed it, the next steps are:
Then, based on the state of the system (i.e., the load on each server, memory utilization, CPU usage, etc.) and the possible actions (e.g., redirect traffic, start a new server instance), the RL agent will learn the best action to take in each state to maximize its cumulative reward. The exact state and action definitions will depend on your specific problem and infrastructure details.

Problem Formulation: Before jumping into Reinforcement Learning (RL), we need to define the problem clearly. For a cloud load balancer, the goal might be to distribute incoming traffic across a number of servers efficiently to maximize the throughput, minimize response time, etc.
State Representation: Decide on how to represent the state of your system. In the case of a load balancer, the state could consist of metrics like the current load of each server, the queue length of requests, etc.

Action Definition: What are the actions that the RL agent can take? For a load balancer, this might be directing traffic to a particular server.


Reward Function: This is crucial in RL. Define a reward function that gives a signal to the agent about how good or bad its action was. For a load balancer, a simple reward might be negative for every request that isn't handled within a certain time.


Choose an RL Algorithm: Depending on the problem's complexity and the amount of data, we might want to start with simpler algorithms like Q-learning, or go with more complex methods like Deep Q Networks (DQNs) or Proximal Policy Optimization (PPO).


Training the Agent: Use the historical data we have to train the RL agent. We need to make sure to split the data into training and validation sets to evaluate the agent's performance.


Evaluation: After training, evaluate the RL agent's performance on a test dataset or in a simulated environment. This will give us an idea of how well the agent might perform in a real-world scenario.


Deployment: Once satisfied with the performance, you can deploy the RL-based load balancer in a controlled environment. Monitor its performance closely to ensure it's making the right decisions.
Continuous Learning: The environment might change over time (e.g., different traffic patterns), so it's essential to keep re-training or fine-tuning the agent to adapt to new situations.

# State Representation

# Reward Function

The reward can be based on a combination of the following:


Response Time: Higher rewards for faster response times. This can be inferred from metrics like cpu_usage_distribution and assigned_memory.


Server Utilization:
Provide higher rewards when servers are utilized up to a certain threshold (e.g., 85% of their capacity).


Penalize the agent if the cpu_usage_distribution for any server goes beyond a critical limit, say 90%.


Penalize the agent if the assigned_memory for any server is above a critical utilization limit, say 95%.


Energy Consumption: Give higher rewards for actions that result in lower energy consumption. Though direct metrics for energy aren't provided, you could infer from cpu_usage_distribution and memory_accesses_per_instruction.
Server Failures: Heavily penalize the agent if any server fails (failed column).

In [29]:
def reward_function(state):
    reward = 0

    # Extract state variables
    response_time, cpu_usage_distribution, assigned_memory, failed, cycles_per_instruction, mem_access_per_instr = state  # Re-adjust based on your actual state representation

    # Response Time Reward
    desired_response_time = 100  # example threshold
    reward += max((desired_response_time - response_time) / desired_response_time, -1)  # Scaled between -1 and 1

    # Server Utilization Reward
    average_cpu_usage = float(cpu_usage_distribution['average_usage'])  # Convert from categorical string to float
    maximum_cpu_usage = float(cpu_usage_distribution['maximum_usage'])

    if 60 <= average_cpu_usage <= 85:
        reward += 1
    if maximum_cpu_usage > 90 or assigned_memory > 90:
        reward -= 5
    if average_cpu_usage < 60:
        reward -= 1

    # Server Failure Penalty
    if failed:
        reward -= 10

    # Energy Consumption (Inferred)
    energy_efficiency = cycles_per_instruction / (mem_access_per_instr + 1e-5)  # Adding a small value to prevent division by zero
    if energy_efficiency < 1:  # 1 is an arbitrary threshold, adjust based on domain insights
        reward += 0.5
    else:
        reward -= 0.5

    return reward


# Implementing RL Algorithm

In [34]:


import numpy as np

class CloudBalancerEnvironment:
    def __init__(self):
        # Initial states
        self.state = {
            'cpu_usage_distribution': {'average_usage': np.random.rand()*100, 'maximum_usage': np.random.rand()*100},  # For simplicity, we're randomizing between 0-100%. In reality, fetch from data.
            'assigned_memory': np.random.rand()*100,
            'failed': np.random.choice([True, False]),
            'page_cache_memory': np.random.rand()*100,
            'cycles_per_instruction': np.random.rand(),
            'memory_accesses_per_instruction': np.random.rand()
        }
        self.current_state = None
        self.done = False  # to check if the episode is done

    def step(self, action):
        # Here, update the state based on the action. This is a stub and will be more complex in real-world applications.
        # For the sake of the example, we just randomize the state again.
        self.state = {
            'cpu_usage_distribution': {'average_usage': np.random.rand()*100, 'maximum_usage': np.random.rand()*100},
            'assigned_memory': np.random.rand()*100,
            'failed': np.random.choice([True, False]),
            'page_cache_memory': np.random.rand()*100,
            'cycles_per_instruction': np.random.rand(),
            'memory_accesses_per_instruction': np.random.rand()
        }

        reward = self.calculate_reward(action)
        return self.state, reward

    def calculate_reward(self, action):
        reward = 0
        average_cpu_usage = self.state['cpu_usage_distribution']['average_usage']
        maximum_cpu_usage = self.state['cpu_usage_distribution']['maximum_usage']

        # Response Time Reward
        reward += max(1 - average_cpu_usage/100, -1)
        if maximum_cpu_usage > 90 or self.state['assigned_memory'] > 90:
            reward -= 5
        if average_cpu_usage < 60:
            reward -= 1

        # Server Failure Penalty
        if self.state['failed']:
            reward -= 10

        # Energy Consumption (Inferred)
        energy_efficiency = self.state['cycles_per_instruction'] / (self.state['memory_accesses_per_instruction'] + 1e-5)
        if energy_efficiency < 1:
            reward += 0.5
        else:
            reward -= 0.5

        return reward

    def reset(self):
        self.__init__()

class LoadBalancerAgent:
    def __init__(self, n_states, n_actions, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.n_actions = n_actions
        self.q_table = np.zeros((n_states, n_actions))
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate

    def select_action(self, state):
        # Epsilon-greedy action selection
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.n_actions)
        else:
            return np.argmax(self.q_table[state])

    def update(self, state, action, reward, next_state):
        # Q-learning update rule
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.gamma * self.q_table[next_state, best_next_action]
        td_error = td_target - self.q_table[state, action]
        self.q_table[state, action] += self.alpha * td_error

# Number of states is a rough approximation for simplicity
# Assuming each state variable can take 100 values, and there are 6 state variables
n_states = 100 ** 6
env = CloudBalancerEnvironment()
agent = LoadBalancerAgent(n_states, n_actions=3)


# Training the Agent

In [None]:
# Train the agent
for episode in range(100):
    state = env.reset()
    if state is None:
        raise ValueError("The environment's reset method returned None instead of an initial state.")
    total_reward = 0

    for t in range(1000):
        # Encode the state (this is a naive encoding for simplicity, a more complex encoding would be better)
        state_idx = sum([int(val*100**i) for i, val in enumerate(state)])

        action = agent.select_action(state_idx)
        next_state, reward = env.step(action)
        next_state_idx = sum([int(val*100**i) for i, val in enumerate(next_state)])

        agent.update(state_idx, action, reward, next_state_idx)

        state = next_state
        total_reward += reward

    print(f"Episode: {episode+1}, Total Reward: {total_reward}")


# Evaluation