In [1]:

import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go


# graphs are included in the notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 6)
plt.style.use('ggplot')
import plotly.express as px

ModuleNotFoundError: No module named 'seaborn'

# Import data

- Merge Dataframes into 1 
- Make new columns corresponding to the parameters: object, sampling function, sphere radius, distance, time

In [None]:
def get_sampling_function_name(function):
    return function.split(" ")[2].strip()

In [None]:
folder_path = 'LoCoMoExperimentsSamplingFunction/'
all = pd.DataFrame()
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, index_col=0)
        df = df.dropna()

        params = os.path.splitext(filename)[0]
        params = params.split(",")

        object = params[0]
        df.insert(0, 'object', params[0])
        df.insert(1, 'sample_function', get_sampling_function_name(params[1]))
        df.insert(2, 'sphere_radius', params[2]) 
        df.insert(3, 'positions_sampled', params[3]) 
        df.insert(4, 'distance', params[4])
        df.insert(5, 'duration', params[5])
        all = pd.concat([all, df], ignore_index=True)

all['sphere_radius'] = all['sphere_radius'].astype(int)
all['distance'] = all['distance'].astype(int)
all['duration'] = all['duration'].astype(float)
all['Probabilities'] = all['Probabilities'].astype(float)
 


In [None]:
all['hours'] = all['duration'] / 3600

In [None]:
all.head()



In [None]:
max(all['Probabilities'])

# Exploratory Data Analysis

In [None]:
# all = all[(all['Probabilities'] >= 0) & (all['Probabilities'] <= 0.1)]
plt.hist(all['Probabilities'], bins=100) 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')

# Show the plot
plt.show()


## Sampling method  
Divide all the dataframes depending on the method used 
- Random sampling
- Opposite normal sampling

In [None]:
random_sampling = all[all.sample_function=='sample_finger_poses_random']
opposite_normal_sampling = all[all.sample_function=='sample_finger_poses_opposite']

In [None]:
cmap = 'viridis'

plt.hist(random_sampling['Probabilities'], bins=100, label='random', alpha=0.6, color='red')
plt.hist(opposite_normal_sampling['Probabilities'], bins=100, label='opposite', alpha=0.6, color='blue') 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')
plt.legend(loc='best')
# Show the plot
plt.show()


In [None]:
top_10_random = random_sampling.nlargest(500, 'Probabilities')
top_10_opposite = opposite_normal_sampling.nlargest(500, 'Probabilities')


In [None]:
plt.hist(top_10_random['Probabilities'], label='random', alpha=0.6, color=plt.get_cmap(cmap)(.5))
# plt.hist(random_sampling['Probabilities'], bins=100, label='random', alpha=0.6)

plt.hist(top_10_opposite['Probabilities'], label='opposite', alpha=0.6, color=plt.get_cmap(cmap)(.8)) 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')
plt.legend(loc='best')
# Show the plot
plt.show()


## Summary Statistics

In [None]:
summary_stats = all.groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('Probabilities', 'mean'),
    Median_Score=('Probabilities', 'median'),
    Std_Dev=('Probabilities', 'std'),
    Min_Score=('Probabilities', 'min'),
    Max_Score=('Probabilities', 'max'),
    Q1=('Probabilities', lambda x: x.quantile(0.25)),
    Q3=('Probabilities', lambda x: x.quantile(0.75)),
    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
).reset_index()

In [None]:
summary_stats

### Violin plot

In [None]:
fig = px.violin(all, y='Probabilities', color='sample_function', violinmode='overlay')
fig.show()

**Sphere Radius**

In [None]:

fig = go.Figure()

fig.add_trace(go.Violin(
    x=all['sphere_radius'][ all['sample_function'] == 'sample_finger_poses_opposite' ],
                        y=all['Probabilities'][ all['sample_function'] == 'sample_finger_poses_opposite' ],
                        legendgroup='sample_finger_poses_opposite', scalegroup='sample_finger_poses_opposite', name='sample_finger_poses_opposite',
                        side='negative',
                        line_color='lightseagreen')
             )
fig.add_trace(go.Violin(
    x=all['sphere_radius'][ all['sample_function'] == 'sample_finger_poses_random' ],
                        y=all['Probabilities'][ all['sample_function'] == 'sample_finger_poses_random' ],
                        legendgroup='sample_finger_poses_random', scalegroup='sample_finger_poses_random', name='sample_finger_poses_random',
                        side='positive',
                        line_color='mediumpurple')
             )
fig.update_traces(meanline_visible=True, points=False)
fig.update_layout(violingap=0, violinmode='overlay')
fig.show()

**Distance**  
~to fix later~

In [None]:
fig = go.Figure()

fig.add_trace(go.Violin(
    x=all['distance'][ all['sample_function'] == 'sample_finger_poses_opposite' ],
                        y=all['Probabilities'][ all['sample_function'] == 'sample_finger_poses_opposite' ],
                        legendgroup='sample_finger_poses_opposite', scalegroup='sample_finger_poses_opposite', name='sample_finger_poses_opposite',
                        side='negative',
                        line_color='lightseagreen')
             )
fig.add_trace(go.Violin(
    x=all['distance'][ all['sample_function'] == 'sample_finger_poses_random' ],
                        y=all['Probabilities'][ all['sample_function'] == 'sample_finger_poses_random' ],
                        legendgroup='sample_finger_poses_random', scalegroup='sample_finger_poses_random', name='sample_finger_poses_random',
                        side='positive',
                        line_color='mediumpurple')
             )
fig.update_traces(meanline_visible=True, points=False)
fig.update_layout(violingap=0, violinmode='overlay')
fig.show()

**All**

In [None]:
fig = go.Figure()

distance_values = np.sort(all['distance'].unique())

colors = [['mediumpurple', 'gold'], ['deepskyblue', 'orange'], ['palegreen', 'deeppink']]

for distance, color in zip(distance_values, colors):
    fig.add_trace(go.Violin(
        y=all['Probabilities'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        legendgroup=str(distance),
        scalegroup=str(distance),
        name='Distance ' + str(distance),
        side='positive',
        line_color = color[0])
    )
    fig.add_trace(go.Violin(
        x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
        y=all['Probabilities'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
        legendgroup=str(distance),
        scalegroup=str(distance),
        name='Distance ' + str(distance),
        side='negative',
        line_color = color[1])
    )

fig.update_traces(meanline_visible=True, points=False)
fig.update_layout(violingap=0, violinmode='overlay')
# fig.update_layout(violin_gap=0.1)  # Adjust the violin_gap value as desired

fig.show()

In [None]:
fig = go.Figure()

distance_values = np.sort(all['distance'].unique())

colors = [['mediumpurple', 'gold'], ['deepskyblue', 'orange'], ['palegreen', 'deeppink']]

for distance, color in zip(distance_values, colors):
    fig.add_trace(go.Violin(
        y=all['Probabilities'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        legendgroup=str(distance),
        scalegroup=str(distance),
        name='Distance ' + str(distance),
        side='positive',
        line_color = color[0])
    )
    # fig.add_trace(go.Violin(
    #     x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
    #     y=all['Probabilities'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
    #     legendgroup=str(distance),
    #     scalegroup=str(distance),
    #     name='Distance ' + str(distance),
    #     side='negative',
    #     line_color = color[1])
    # )

fig.update_traces(meanline_visible=True, points=False)
fig.update_layout(violingap=0, violinmode='overlay')
# fig.update_layout(violin_gap=0.1)  # Adjust the violin_gap value as desired

fig.show()

**For time**

In [None]:
fig = go.Figure()

distance_values = np.sort(all['distance'].unique())

colors = [['mediumpurple', 'gold'], ['deepskyblue', 'orange'], ['palegreen', 'deeppink']]

for distance, color in zip(distance_values, colors):
    fig.add_trace(go.Violin(
        y=all['hours'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_opposite')],
        legendgroup=str(distance),
        scalegroup=str(distance),
        name='Distance ' + str(distance),
        side='positive',
        line_color = color[0])
    )
    fig.add_trace(go.Violin(
        x=all['sphere_radius'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
        y=all['hours'][(all['distance'] == distance) & (all['sample_function'] == 'sample_finger_poses_random')],
        legendgroup=str(distance),
        scalegroup=str(distance),
        name='Distance ' + str(distance),
        side='negative',
        line_color = color[1])
    )

fig.update_traces(meanline_visible=True, points=False)
fig.update_layout(violingap=0, violinmode='overlay')
# fig.update_layout(violin_gap=0.1)  # Adjust the violin_gap value as desired

fig.show()

**First conclusion:**
- Mean most of the time better for the Opposite sampling strategy
- Distance is best when smaller - good for computation because loops over a lower amount of points in the point cloud
- Sphere radius higher - best for opposite strategy -> incerasing

# Randomization tests - question mark

# Best Parameter Combination

Combination of parameter leading to: 
- **_Probabilty results_**
- **_Timing_**   

Best trade-off for:
- Probability results (to maximise) and timing (to minimise)
- Object type

Investigate the following:
- Mean
- Median
- Max
- Best 10

## Probability Results

In [None]:
summary_stats

### Mean

In [None]:
df_value_random = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_random']
df_value_opposite = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random = df_value_random.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
pivot_df_value_opposite = df_value_opposite.pivot(index='distance', columns='sphere_radius', values='Mean_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Median

In [None]:
df_value_random = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_random']
df_value_opposite = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random = df_value_random.pivot(index='distance', columns='sphere_radius', values='Median_Score')
pivot_df_value_opposite = df_value_opposite.pivot(index='distance', columns='sphere_radius', values='Median_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Max

In [None]:
df_value_random = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_random']
df_value_opposite = summary_stats[summary_stats['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random = df_value_random.pivot(index='distance', columns='sphere_radius', values='Max_Score')
pivot_df_value_opposite = df_value_opposite.pivot(index='distance', columns='sphere_radius', values='Max_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Best 100

In [None]:
best_100_random = all[all.sample_function=='sample_finger_poses_random'].nlargest(100, 'Probabilities')
best_100_opposite = all[all.sample_function=='sample_finger_poses_opposite'].nlargest(100, 'Probabilities')

summary_stats_best_100_random = best_100_random.groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('Probabilities', 'mean'),
    Median_Score=('Probabilities', 'median'),
    Std_Dev=('Probabilities', 'std'),
    Min_Score=('Probabilities', 'min'),
    Max_Score=('Probabilities', 'max'),
    Q1=('Probabilities', lambda x: x.quantile(0.25)),
    Q3=('Probabilities', lambda x: x.quantile(0.75)),
    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
).reset_index()

summary_stats_best_100_opposite = best_100_opposite.groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('Probabilities', 'mean'),
    Median_Score=('Probabilities', 'median'),
    Std_Dev=('Probabilities', 'std'),
    Min_Score=('Probabilities', 'min'),
    Max_Score=('Probabilities', 'max'),
    Q1=('Probabilities', lambda x: x.quantile(0.25)),
    Q3=('Probabilities', lambda x: x.quantile(0.75)),
    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
).reset_index()

**1. Mean**

In [None]:
df_value_random_100 = summary_stats_best_100_random[summary_stats_best_100_random['sample_function'] == 'sample_finger_poses_random']
df_value_opposite_100 = summary_stats_best_100_opposite[summary_stats_best_100_opposite['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random_100 = df_value_random_100.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
pivot_df_value_opposite_100 = df_value_opposite_100.pivot(index='distance', columns='sphere_radius', values='Mean_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random_100, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite_100, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

**2. Median**

In [None]:
df_value_random_100 = summary_stats_best_100_random[summary_stats_best_100_random['sample_function'] == 'sample_finger_poses_random']
df_value_opposite_100 = summary_stats_best_100_opposite[summary_stats_best_100_opposite['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random_100 = df_value_random_100.pivot(index='distance', columns='sphere_radius', values='Median_Score')
pivot_df_value_opposite_100 = df_value_opposite_100.pivot(index='distance', columns='sphere_radius', values='Median_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random_100, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite_100, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

**3. Max**

In [None]:
df_value_random_100 = summary_stats_best_100_random[summary_stats_best_100_random['sample_function'] == 'sample_finger_poses_random']
df_value_opposite_100 = summary_stats_best_100_opposite[summary_stats_best_100_opposite['sample_function'] == 'sample_finger_poses_opposite']

# Create pivot tables for both DataFrames
pivot_df_value_random_100 = df_value_random_100.pivot(index='distance', columns='sphere_radius', values='Max_Score')
pivot_df_value_opposite_100 = df_value_opposite_100.pivot(index='distance', columns='sphere_radius', values='Max_Score')

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_df_value_random_100, annot=True, cmap='GnBu', ax=axes[0], vmin=0, vmax=1)
axes[0].set_title('Heatmap for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_df_value_opposite_100, annot=True, cmap='GnBu', ax=axes[1], vmin=0, vmax=1)
axes[1].set_title('Heatmap for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

## Time 

In [None]:
time_stats = all.groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('hours', 'mean'),
    Median_Score=('hours', 'median'),
    Std_Dev=('hours', 'std'),
    Min_Score=('hours', 'min'),
    Max_Score=('hours', 'max'),
    Q1=('hours', lambda x: x.quantile(0.25)),
    Q3=('hours', lambda x: x.quantile(0.75)),
    IQR=('hours', lambda x: x.quantile(0.75) - x.quantile(0.25))
).reset_index()

time_value_random = time_stats[time_stats['sample_function'] == 'sample_finger_poses_random']
time_value_opposite = time_stats[time_stats['sample_function'] == 'sample_finger_poses_opposite']

time_stats.head(5)

### Mean

In [None]:


# Create pivot tables for both DataFrames

pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Set the common vmin and vmax for the color bar
vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_time_value_random, annot=True, cmap='plasma', ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title('Time for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='plasma', ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title('Time for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Median

In [None]:
# Create pivot tables for both DataFrames

pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Median_Score')
pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Median_Score')
pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Set the common vmin and vmax for the color bar
vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_time_value_random, annot=True, cmap='plasma', ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title('Time for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='plasma', ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title('Time for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Max

In [None]:
# Create pivot tables for both DataFrames

pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Max_Score')
pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Max_Score')
pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Set the common vmin and vmax for the color bar
vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_time_value_random, annot=True, cmap='plasma', ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title('Time for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='plasma', ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title('Time for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

### Min

In [None]:
# Create pivot tables for both DataFrames

pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Min_Score')
pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Min_Score')
pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

# Set up the matplotlib figure with 1 row and 2 columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Set the common vmin and vmax for the color bar
vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


# Create the first heatmap for col3 = 0 using seaborn
sns.heatmap(data=pivot_time_value_random, annot=True, cmap='plasma', ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title('Time for sample function = Random')

# Create the second heatmap for col3 = 1 using seaborn
sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='plasma', ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title('Time for sample function = Opposite')

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

# Object specific performance

In [None]:
all.object.unique()

## Probability Results

### Mean

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    object_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
                    Mean_Score=('Probabilities', 'mean'),
                    Median_Score=('Probabilities', 'median'),
                    Std_Dev=('Probabilities', 'std'),
                    Min_Score=('Probabilities', 'min'),
                    Max_Score=('Probabilities', 'max'),
                    Q1=('Probabilities', lambda x: x.quantile(0.25)),
                    Q3=('Probabilities', lambda x: x.quantile(0.75)),
                    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()


    object_value_random = object_stats[object_stats['sample_function'] == 'sample_finger_poses_random']
    object_value_opposite = object_stats[object_stats['sample_function'] == 'sample_finger_poses_opposite']

    # Create pivot tables for both DataFrames
    pivot_df_value_random = object_value_random.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
    pivot_df_value_opposite = object_value_opposite.pivot(index='distance', columns='sphere_radius', values='Mean_Score')

    # Set up the matplotlib figure with 1 row and 2 columns

    # Create the first heatmap for col3 = 0 using seaborn
    sns.heatmap(data=pivot_df_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=0, vmax=1)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Mean - sample function = Random')

    # Create the second heatmap for col3 = 1 using seaborn
    sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=0, vmax=1)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + 'Mean for sample function = Opposite')

    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()

### Median

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    object_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
                    Mean_Score=('Probabilities', 'mean'),
                    Median_Score=('Probabilities', 'median'),
                    Std_Dev=('Probabilities', 'std'),
                    Min_Score=('Probabilities', 'min'),
                    Max_Score=('Probabilities', 'max'),
                    Q1=('Probabilities', lambda x: x.quantile(0.25)),
                    Q3=('Probabilities', lambda x: x.quantile(0.75)),
                    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()


    object_value_random = object_stats[object_stats['sample_function'] == 'sample_finger_poses_random']
    object_value_opposite = object_stats[object_stats['sample_function'] == 'sample_finger_poses_opposite']

    # Create pivot tables for both DataFrames
    pivot_df_value_random = object_value_random.pivot(index='distance', columns='sphere_radius', values='Median_Score')
    pivot_df_value_opposite = object_value_opposite.pivot(index='distance', columns='sphere_radius', values='Median_Score')

    # Set up the matplotlib figure with 1 row and 2 columns

    # Create the first heatmap for col3 = 0 using seaborn
    sns.heatmap(data=pivot_df_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=0, vmax=1)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Median - sample function = Random')

    # Create the second heatmap for col3 = 1 using seaborn
    sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=0, vmax=1)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + 'Median for sample function = Opposite')

    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()

### Max

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    object_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
                    Mean_Score=('Probabilities', 'mean'),
                    Median_Score=('Probabilities', 'median'),
                    Std_Dev=('Probabilities', 'std'),
                    Min_Score=('Probabilities', 'min'),
                    Max_Score=('Probabilities', 'max'),
                    Q1=('Probabilities', lambda x: x.quantile(0.25)),
                    Q3=('Probabilities', lambda x: x.quantile(0.75)),
                    IQR=('Probabilities', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()


    object_value_random = object_stats[object_stats['sample_function'] == 'sample_finger_poses_random']
    object_value_opposite = object_stats[object_stats['sample_function'] == 'sample_finger_poses_opposite']

    # Create pivot tables for both DataFrames
    pivot_df_value_random = object_value_random.pivot(index='distance', columns='sphere_radius', values='Max_Score')
    pivot_df_value_opposite = object_value_opposite.pivot(index='distance', columns='sphere_radius', values='Max_Score')

    # Set up the matplotlib figure with 1 row and 2 columns

    # Create the first heatmap for col3 = 0 using seaborn
    sns.heatmap(data=pivot_df_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=0, vmax=1)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Max - sample function = Random')

    # Create the second heatmap for col3 = 1 using seaborn
    sns.heatmap(data=pivot_df_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=0, vmax=1)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + 'Max for sample function = Opposite')

    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()

## Time

### Mean

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    time_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('hours', 'mean'),
    Median_Score=('hours', 'median'),
    Std_Dev=('hours', 'std'),
    Min_Score=('hours', 'min'),
    Max_Score=('hours', 'max'),
    Q1=('hours', lambda x: x.quantile(0.25)),
    Q3=('hours', lambda x: x.quantile(0.75)),
    IQR=('hours', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()

    time_value_random = time_stats[time_stats['sample_function'] == 'sample_finger_poses_random']
    time_value_opposite = time_stats[time_stats['sample_function'] == 'sample_finger_poses_opposite']



    # Create pivot tables for both DataFrames

    pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
    pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Mean_Score')
    pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
    pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

    # Set the common vmin and vmax for the color bar
    vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
    vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


    # Create the first heatmap for col3 = 0 using seaborn
    
    sns.heatmap(data=pivot_time_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=vmin, vmax=vmax)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Mean Time - sample function = Random')


    sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=vmin, vmax=vmax)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + ' Mean Time - sample function = Opposite')
    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()




### Median

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    time_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('hours', 'mean'),
    Median_Score=('hours', 'median'),
    Std_Dev=('hours', 'std'),
    Min_Score=('hours', 'min'),
    Max_Score=('hours', 'max'),
    Q1=('hours', lambda x: x.quantile(0.25)),
    Q3=('hours', lambda x: x.quantile(0.75)),
    IQR=('hours', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()

    time_value_random = time_stats[time_stats['sample_function'] == 'sample_finger_poses_random']
    time_value_opposite = time_stats[time_stats['sample_function'] == 'sample_finger_poses_opposite']



    # Create pivot tables for both DataFrames

    pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Median_Score')
    pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Median_Score')
    pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
    pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

    # Set the common vmin and vmax for the color bar
    vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
    vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


    # Create the first heatmap for col3 = 0 using seaborn
    
    sns.heatmap(data=pivot_time_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=vmin, vmax=vmax)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Median Time - sample function = Random')


    sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=vmin, vmax=vmax)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + ' Median Time - sample function = Opposite')
    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()




### Max

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    time_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('hours', 'mean'),
    Median_Score=('hours', 'median'),
    Std_Dev=('hours', 'std'),
    Min_Score=('hours', 'min'),
    Max_Score=('hours', 'max'),
    Q1=('hours', lambda x: x.quantile(0.25)),
    Q3=('hours', lambda x: x.quantile(0.75)),
    IQR=('hours', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()

    time_value_random = time_stats[time_stats['sample_function'] == 'sample_finger_poses_random']
    time_value_opposite = time_stats[time_stats['sample_function'] == 'sample_finger_poses_opposite']



    # Create pivot tables for both DataFrames

    pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Max_Score')
    pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Max_Score')
    pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
    pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

    # Set the common vmin and vmax for the color bar
    vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
    vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


    # Create the first heatmap for col3 = 0 using seaborn
    
    sns.heatmap(data=pivot_time_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=vmin, vmax=vmax)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Max Time - sample function = Random')


    sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=vmin, vmax=vmax)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + ' Max Time - sample function = Opposite')
    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()




### Min

In [None]:
fig, axes = plt.subplots(len(all.object.unique()), 2, figsize=(18, 10))

for object_index in range(len(all.object.unique())):
    # Create pivot tables for both DataFrames

    time_stats = all[all.object == all.object.unique()[object_index]].groupby(['sample_function', 'sphere_radius', 'distance']).agg(
    Mean_Score=('hours', 'mean'),
    Median_Score=('hours', 'median'),
    Std_Dev=('hours', 'std'),
    Min_Score=('hours', 'min'),
    Max_Score=('hours', 'max'),
    Q1=('hours', lambda x: x.quantile(0.25)),
    Q3=('hours', lambda x: x.quantile(0.75)),
    IQR=('hours', lambda x: x.quantile(0.75) - x.quantile(0.25))
    ).reset_index()

    time_value_random = time_stats[time_stats['sample_function'] == 'sample_finger_poses_random']
    time_value_opposite = time_stats[time_stats['sample_function'] == 'sample_finger_poses_opposite']



    # Create pivot tables for both DataFrames

    pivot_time_value_random = time_value_random.pivot(index='distance', columns='sphere_radius', values='Min_Score')
    pivot_time_value_opposite = time_value_opposite.pivot(index='distance', columns='sphere_radius', values='Min_Score')
    pivot_time_value_random = pivot_time_value_random.sort_index(ascending=False)
    pivot_time_value_opposite = pivot_time_value_opposite.sort_index(ascending=False)

    # Set the common vmin and vmax for the color bar
    vmin = min(pivot_time_value_random.min().min(), pivot_time_value_opposite.min().min())
    vmax = max(pivot_time_value_random.max().max(), pivot_time_value_opposite.max().max())


    # Create the first heatmap for col3 = 0 using seaborn
    
    sns.heatmap(data=pivot_time_value_random, annot=True, cmap='PuRd', ax=axes[object_index][0], vmin=vmin, vmax=vmax)
    axes[object_index][0].set_title(str(all.object.unique()[object_index]) + ' Min Time - sample function = Random')


    sns.heatmap(data=pivot_time_value_opposite, annot=True, cmap='PuRd', ax=axes[object_index][1], vmin=vmin, vmax=vmax)
    axes[object_index][1].set_title(str(all.object.unique()[object_index]) + ' Minn Time - sample function = Opposite')
    # Adjust the layout
    plt.tight_layout()

# Show the plot
plt.show()




In [None]:
pivot_time_value_opposite.max().max()

# Hypothesis testings - Statistical Tests

**Sampling strategy**  
Is there significant evidence of a difference in `mean`/`median`/`max` (?) probability score for **random** and **opposite** sampling strategy in the LoCoMo algorithm?

Hypotheses:
$$H_0: \mu_R=\mu_O\\
H_A:\mu_R\neq\mu_O$$

Statistical test: t-test.    

The variable `Probabilities` is a quantitative variable where there are results from 2 groups: results from LoCoMo algorithm using the **Random** sampling stratgey and results from the algorithm using the **Opposite** sampling strategy.

In [None]:
random_prob = sorted(list(all[all.sample_function=='sample_finger_poses_random'].Probabilities), reverse=True)[:10]
opposite_prob = sorted(list(all[all.sample_function=='sample_finger_poses_opposite'].Probabilities), reverse=True)[:10]

In [None]:
from statistics import stdev

print(len(random_prob), 'probability results with random strategy with mean=', np.mean(random_prob), ' and stdev = ', stdev(random_prob))
print(len(opposite_prob), 'probability results with opposite strategy with mean=', np.mean(opposite_prob), ' and stdev = ', stdev(opposite_prob))

Run the $t$-test

In [None]:
from scipy.stats import ttest_ind

t_statistic, p_value = ttest_ind(opposite_prob, random_prob, equal_var=False)
print('t = ', t_statistic)
print('p_value = ', p_value)

In [None]:
print(np.mean(random_prob), ' vs. ', np.mean(opposite_prob))

In [None]:
cmap = 'viridis'

plt.hist(random_prob, bins=100, label='random', alpha=0.6, color='red')
plt.hist(opposite_prob, bins=100, label='opposite', alpha=0.6, color='blue') 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')
plt.legend(loc='best')
# Show the plot
plt.show()


Math check...

In [None]:
mean_diff = np.mean(random_prob) - np.mean(opposite_prob)
mean_diff

In [None]:
std1 = np.std(random_prob, ddof=1)
std2 = np.std(opposite_prob, ddof=1)
print('std1 =', std1, ', std2 = ', std2)

In [None]:
n1 = len(random_prob)
n2 = len(opposite_prob)
print('n1 =', n1, ', n2 = ', n2)

In [None]:
denominator = np.sqrt(((std1**2)/n1) + (std2**2/n2))
denominator

In [None]:
t = mean_diff/denominator
t

In [None]:
max(list(opposite_prob))

In [None]:
max(list(random_prob))

In [None]:
all[all.Probabilities==1]

## Distance

In [None]:
from scipy.stats import f_oneway
distances = all.distance.unique()

In [None]:

f_oneway(all[all.distance==distances[0]].Probabilities,
            all[all.distance==distances[1]].Probabilities,
            all[all.distance==distances[2]].Probabilities)

In [None]:
cmap = 'viridis'

plt.hist(all[all.distance==distances[0]].Probabilities, bins=100, label=distances[0], alpha=0.6, color='red')
plt.hist(all[all.distance==distances[1]].Probabilities, bins=100, label=distances[1], alpha=0.6, color='blue') 
plt.hist(all[all.distance==distances[2]].Probabilities, bins=100, label=distances[2], alpha=0.6, color='green') 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')
plt.legend(loc='best')
# Show the plot
plt.show()


## Sphere Radius

In [None]:
radius = all.sphere_radius.unique()
radius

In [None]:

f_oneway(all[all.sphere_radius==radius[0]].Probabilities,
            all[all.sphere_radius==radius[1]].Probabilities,
            all[all.sphere_radius==radius[2]].Probabilities)

In [None]:
cmap = 'viridis'

plt.hist(all[all.sphere_radius==radius[0]].Probabilities, bins=100, label=radius[0], alpha=0.6, color='mediumpurple')
plt.hist(all[all.sphere_radius==radius[1]].Probabilities, bins=100, label=radius[1], alpha=0.6, color='blue') 
plt.hist(all[all.sphere_radius==radius[2]].Probabilities, bins=100, label=radius[2], alpha=0.6, color='lightseagreen') 

# Set labels and title for the plot
plt.xlabel('LoCoMo Probability Scores')
plt.ylabel('Frequency')
plt.title('Distribution of Scores')
plt.legend(loc='best')
# Show the plot
plt.show()


## Test whether there is a significance for the object

In [None]:
objects = all.object.unique()
objects


In [None]:
f_oneway(all[all.objects==objects[0]].Probabilities,
            all[all.object==objects[1]].Probabilities,
            all[all.object==objects[2]].Probabilities,
            all[all.object==objects[3]].Probabilities)

### Influence of parameters on objects

**Sampling strategy**

In [None]:
for object_index in range(len(objects)):
    random_prob = sorted(list(all[all.sample_function=='sample_finger_poses_random' and all.object==objects[object_index]].Probabilities), reverse=True)[:10]
    opposite_prob = sorted(list(all[all.sample_function=='sample_finger_poses_opposite' and all.object==objects[object_index]].Probabilities), reverse=True)[:10]

    t_statistic, p_value = ttest_ind(opposite_prob, random_prob, equal_var=False)
    print('Object = ', objects[object_index])
    print(' t = ', t_statistic)
    print(' p_value = ', p_value)

**Distance**

In [None]:
for object_index in range(len(objects)):


    f, p_value = f_oneway(   all[all.distance==distances[0] and all.object==objects[object_index]].Probabilities,
                all[all.distance==distances[1] and all.object==objects[object_index]].Probabilities,
                all[all.distance==distances[2] and all.object==objects[object_index]].Probabilities)
    
    print('Object = ', objects[object_index])
    print(' f = ', t_statistic)
    print(' p_value = ', p_value)

**Sphere radius**

In [None]:
for object_index in range(len(objects)):


    f, p_value = f_oneway(  all[all.sphere_radius==radius[0] and all.object==objects[object_index]].Probabilities,
                            all[all.sphere_radius==radius[1] and all.object==objects[object_index]].Probabilities,
                            all[all.sphere_radius==radius[2] and all.object==objects[object_index]].Probabilities)
    
    print('Object = ', objects[object_index])
    print(' f = ', t_statistic)
    print(' p_value = ', p_value)