In [1]:
from q_table_agent import QTableAgent
from itertools import combinations

In [2]:
agent = QTableAgent.load_q_table('./experiments/cartpole/q_table_agent.csv')
result = agent.query_q_table([])
print(result)

Loading sparse Q-Table and configuration from ./experiments/cartpole/q_table_agent.csv...
Sparse Q-Table Details:
 - State Space Dimensions: 4
   Sizes: [16, 16, 16, 16]
 - Action Space Dimensions: 1
   Sizes: 2
 - Non-Zero Entries in Sparse Q-Table: 0
 - Total Possible Entries (Dense): 131072
 - Sparsity: 100.00%
Sparse Q-Table successfully loaded.
Sparse Q-Table Details:
 - State Space Dimensions: 4
   Sizes: [16, 16, 16, 16]
 - Action Space Dimensions: 1
   Sizes: 2
 - Non-Zero Entries in Sparse Q-Table: 12021
 - Total Possible Entries (Dense): 131072
 - Sparsity: 90.83%
       State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                  7              7              8              7   
1                  7              6              8              8   
2                  7              6              8              9   
3                  7              5              8             10   
4                  7              6              9              9   
...  

In [3]:
filters = [
    {"field": "State_0_Value", "operator": ">", "value": 1},
    {"field": "State_1_Value", "operator": "<", "value": 0}
]
result = agent.query_q_table(filters)
print(result)


       State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
5815              11              7              3              8   
5816              11              6              4              9   
5817              11              6              4             10   
5818              11              5              5             11   
5819              11              4              5             12   
...              ...            ...            ...            ...   
11995             11              1              7             12   
12000             12              5             10             15   
12008             12              4              6             11   
12010             12              5             10             11   
12015             12              6             11             13   

       State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
5815            1.12      -0.133333      -0.150000       0.133333   
5816            1.12      -0.4000

In [4]:
result = agent.query_q_table([])
df = QTableAgent.compute_action_probabilities(result, strategy="greedy", epsilon=0.1)
print(df)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 2              0             11             15   
1                 2              0             12             15   
2                 2              0             13             15   
3                 3              0              5             10   
4                 3              0              5             11   
...             ...            ...            ...            ...   
7043             12             11              9              6   
7044             12             11             10              5   
7045             12             11             10              6   
7046             12             11             10              7   
7047             12             11             13              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.76      -2.000000       0.116667       2.000000   
1             -1.76      -2.000000       0.1500

In [5]:
# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

Mutual Information for Features ('State_0_Index',) with Action_0_Index: 0.0015781349136027743
Mutual Information for Features ('State_1_Index',) with Action_0_Index: 0.0061206395377381975
Mutual Information for Features ('State_2_Index',) with Action_0_Index: 0.008478292707414528
Mutual Information for Features ('State_3_Index',) with Action_0_Index: 0.015466927268595831
Mutual Information for Features ('State_0_Index', 'State_1_Index') with Action_0_Index: 0.014946486074669736
Mutual Information for Features ('State_0_Index', 'State_2_Index') with Action_0_Index: 0.017116901331863568
Mutual Information for Features ('State_0_Index', 'State_3_Index') with Action_0_Index: 0.02928140611937543
Mutual Information for Features ('State_1_Index', 'State_2_Index') with Action_0_Index: 0.021983948277050813
Mutual Information for Features ('State_1_Index', 'State_3_Index') with Action_0_Index: 0.023859106475867174
Mutual Information for Features ('State_2_Index', 'State_3_Index') with Action_0_I

In [6]:
filters = [
    {"field": "State_0_Value", "operator": ">", "value": -0.5},
    {"field": "State_0_Value", "operator": "<", "value": +0.5}
]
result = agent.query_q_table(filters)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 6              0              1              8   
1                 6              0              1              9   
2                 6              0              1             10   
3                 6              0              2              9   
4                 6              0              2             10   
...             ...            ...            ...            ...   
4100              9             14              4              1   
4101              9             14              5              2   
4102              9             14              6              1   
4103              9             14              7              2   
4104              9             15              6              1   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -0.48      -2.000000      -0.216667       0.133333   
1             -0.48      -2.000000      -0.2166

In [7]:
filters = [
    {"field": "State_0_Value", "operator": "<", "value": -0.5},
    {"field": "State_0_Value", "operator": ">", "value": +0.5}
]
logic = 'OR'
result = agent.query_q_table(filters, logic)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 2              0             11             15   
1                 2              0             12             15   
2                 2              0             13             15   
3                 3              0              5             10   
4                 3              0              5             11   
...             ...            ...            ...            ...   
2938             12             11              9              6   
2939             12             11             10              5   
2940             12             11             10              6   
2941             12             11             10              7   
2942             12             11             13              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.76      -2.000000       0.116667       2.000000   
1             -1.76      -2.000000       0.1500

In [8]:
filters = [
    {"field": "State_3_Value", "operator": ">", "value": -0.25},
    {"field": "State_3_Value", "operator": "<", "value": +0.25}
]
result = agent.query_q_table(filters)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 4              0              3              8   
1                 4              0              4              8   
2                 4              1              2              7   
3                 4              1              2              8   
4                 4              1              3              7   
...             ...            ...            ...            ...   
1243             12             10             11              8   
1244             12             10             12              7   
1245             12             10             12              8   
1246             12             11             10              7   
1247             12             11             13              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.12      -2.000000      -0.150000       0.133333   
1             -1.12      -2.000000      -0.1166

In [9]:
filters = [
    {"field": "State_3_Value", "operator": "<", "value": -0.25},
    {"field": "State_3_Value", "operator": ">", "value": +0.25}
]
logic = 'OR'
result = agent.query_q_table(filters, logic)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 2              0             11             15   
1                 2              0             12             15   
2                 2              0             13             15   
3                 3              0              5             10   
4                 3              0              5             11   
...             ...            ...            ...            ...   
5795             12             11              8              6   
5796             12             11              9              5   
5797             12             11              9              6   
5798             12             11             10              5   
5799             12             11             10              6   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.76      -2.000000       0.116667       2.000000   
1             -1.76      -2.000000       0.1500

In [10]:
filters = [
    {"field": "State_2_Value", "operator": ">", "value": -0.1},
    {"field": "State_2_Value", "operator": "<", "value": +0.1}
]
result = agent.query_q_table(filters)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 3              0              5             10   
1                 3              0              5             11   
2                 3              0              5             13   
3                 3              0              6             12   
4                 3              0              6             13   
...             ...            ...            ...            ...   
3689             12             11              9              5   
3690             12             11              9              6   
3691             12             11             10              5   
3692             12             11             10              6   
3693             12             11             10              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.44      -2.000000      -0.083333       0.666667   
1             -1.44      -2.000000      -0.0833

In [11]:
filters = [
    {"field": "State_2_Value", "operator": "<", "value": -0.1},
    {"field": "State_2_Value", "operator": ">", "value": +0.1}
]
logic = 'OR'
result = agent.query_q_table(filters, logic)
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 2              0             11             15   
1                 2              0             12             15   
2                 2              0             13             15   
3                 3              0             11             13   
4                 3              0             11             14   
...             ...            ...            ...            ...   
3349             12             10             11              8   
3350             12             10             12              7   
3351             12             10             12              8   
3352             12             11              4              5   
3353             12             11             13              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.76      -2.000000       0.116667       2.000000   
1             -1.76      -2.000000       0.1500

In [12]:
result = agent.query_q_table([])
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# Iterate over all possible combinations of features for pairwise feature mutual information
print("\nCalculating Mutual Information Between Features")
for r in range(2, len(state_features) + 1):  # r starts at 2 for pairwise feature MI
    for feature_combination in combinations(state_features, r):
        # Compute mutual information between the selected features
        feature_combination_list = list(feature_combination)
        mi_between_features = QTableAgent.compute_mutual_information(
            df, feature_combination_list[:1], feature_combination_list[1:], use_visit_count=False,
        )
        # Print the results with detailed explanation
        print(f"Mutual Information between Features {feature_combination_list[:1]} and {feature_combination_list[1:]}: {mi_between_features:.4f}")

      State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                 2              0             11             15   
1                 2              0             12             15   
2                 2              0             13             15   
3                 3              0              5             10   
4                 3              0              5             11   
...             ...            ...            ...            ...   
7043             12             11              9              6   
7044             12             11             10              5   
7045             12             11             10              6   
7046             12             11             10              7   
7047             12             11             13              7   

      State_0_Value  State_1_Value  State_2_Value  State_3_Value  \
0             -1.76      -2.000000       0.116667       2.000000   
1             -1.76      -2.000000       0.1500

In [13]:
agent = QTableAgent.load_q_table('./experiments/acrobot/q_table_agent.csv')
result = agent.query_q_table([])
df = QTableAgent.compute_action_probabilities(result, strategy="softmax", temperature=1.0)
print(df)

# List of all state features
state_features = ["State_0_Index", "State_1_Index", "State_2_Index", "State_3_Index", "State_4_Index", "State_5_Index"]

# Iterate over all possible combinations of features
for r in range(1, len(state_features) + 1):  # r is the number of features in each combination
    for feature_combination in combinations(state_features, r):
        # Compute mutual information for the current feature combination
        mi = QTableAgent.compute_mutual_information(
            df, list(feature_combination), "Action_0_Index", use_visit_count=True
        )
        # Print the results with detailed explanation
        print(f"Mutual Information for Features {feature_combination} with Action_0_Index: {mi}")

# Compute mutual information for all features as values
mi = QTableAgent.compute_mutual_information(
    df, ["State_0_Value", "State_1_Value", "State_2_Value", "State_3_Value"], "Action_0_Index", use_visit_count=True
)
print(f"Mutual Information for All Features as Values with Action_0_Index: {mi}")

Loading sparse Q-Table and configuration from ./experiments/acrobot/q_table_agent.csv...
Sparse Q-Table Details:
 - State Space Dimensions: 6
   Sizes: [16, 16, 16, 16, 16, 16]
 - Action Space Dimensions: 1
   Sizes: 3
 - Non-Zero Entries in Sparse Q-Table: 0
 - Total Possible Entries (Dense): 50331648
 - Sparsity: 100.00%
Sparse Q-Table successfully loaded.
Sparse Q-Table Details:
 - State Space Dimensions: 6
   Sizes: [16, 16, 16, 16, 16, 16]
 - Action Space Dimensions: 1
   Sizes: 3
 - Non-Zero Entries in Sparse Q-Table: 724594
 - Total Possible Entries (Dense): 50331648
 - Sparsity: 98.56%
        State_0_Index  State_1_Index  State_2_Index  State_3_Index  \
0                   0              3              0              3   
1                   0              3              0              3   
2                   0              3              0              3   
3                   0              3              0              3   
4                   0              3             