<a href="https://colab.research.google.com/github/M-Colley/colley-dissertation/blob/main/Vis_prior_work_dissertation_Colley.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Supplementary Material for the Dissertation of [Mark Colley](https://scholar.google.de/citations?user=Kt5I7wYAAAAJ&hl=de&oi=ao)
Source Code for some visualizations of the dissertation of Mark Colley entitled "Calibrating Trust in Automated Vehicles - Theoretical, Design, and Empirical Insights into Effects of Visualizations on Trust" (tbd)

In [1]:
#!pip install tikzplotlib --upgrade
!pip install pandas
!pip install --upgrade -q gspread
!pip install pdflatex
!pip install adjustText
!pip install openpyxl
!pip install matplotlib==3.7.1 #--upgrade #with version higher than 3.7.1, LaTeX (version TeXLive 2023) has issues



In [2]:
!sudo apt-get install texlive-latex-extra texlive-fonts-recommended dvipng cm-super

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cm-super is already the newest version (0.3.4-17).
dvipng is already the newest version (1.15-1.1).
texlive-fonts-recommended is already the newest version (2021.20220204-1).
texlive-latex-extra is already the newest version (2021.20220204-1).
0 upgraded, 0 newly installed, 0 to remove and 19 not upgraded.


In [3]:
import pandas as pd

# setup matplotlib to use LaTeX
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
    'legend.fontsize': 'x-large',
    'figure.figsize': (15, 5),
    'axes.labelsize': 'x-large',
    'axes.titlesize':'x-large',
    'xtick.labelsize':'x-large',
    'ytick.labelsize':'x-large'
})

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from matplotlib.patches import Patch
from matplotlib.collections import PatchCollection
from matplotlib.colors import ListedColormap

# Import necessary modules
from matplotlib.ticker import FuncFormatter
from matplotlib.dates import YearLocator, DateFormatter

import pdflatex

import numpy as np
from adjustText import adjust_text

import seaborn as sns

from sklearn.metrics import r2_score

#Get Data from Google Drive

In [4]:
#from google.colab import auth
#auth.authenticate_user()

#import gspread
#from google.auth import default
#creds, _ = default()

#gc = gspread.authorize(creds)

In [5]:
#worksheet = gc.open('Dissertation-Evaluation-Prior-Work').sheet1

# Get data from public [GitHub](https://github.com/M-Colley/colley-dissertation/tree/main/data) repository

In [6]:
# get_all_values gives a list of rows.
#rows = worksheet.get_all_values()
#print(rows)

#main_df = pd.DataFrame.from_records(rows)


#url1 = 'https://github.com/M-Colley/colley-dissertation/blob/main/data/Dissertation-Evaluation-Prior-Work.csv'
url1 = 'https://github.com/M-Colley/colley-dissertation/blob/main/data/Dissertation-Evaluation-Prior-Work.xlsx?raw=true'
main_df = pd.read_excel(url1,  engine='openpyxl')
main_df


Unnamed: 0,Paper,URL,Country,Mean,SD,scale-min,scale-max,year,N,Visualization-level,SAE,Uncertainty,Own,Exclude,Reason,Remarks,Unnamed: 16,Unnamed: 17
0,locken2016autoambicar,https://dl.acm.org/doi/10.1145/3379336.3381502,Germany,,,,,,,,,,No,True,No mean provided,,,
1,wilbrink2020reflecting,http://dx.doi.org/10.1145/3004323.3004329,Germany,,,,,,,,,,No,True,No mean provided,,,
2,lindemann2018catch,https://www.mdpi.com/349948,Germany,4.62,1.41,1.0,7.0,2018.0,32.0,,5.0,No,No,False,,Took high visibility,,
3,lindemann2018catch,https://www.mdpi.com/349948,Germany,5.53,1.08,1.0,7.0,2018.0,32.0,Multiple,5.0,No,No,False,,Took high visibility,,
4,currano2021little,,USA,,,,,,,,,,No,True,No Trust,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,schneider2023dont,https://dl.acm.org/doi/pdf/10.1145/3581641.358...,Germany,,,1.0,5.0,2023.0,113.0,,4.0,No,No,True,,between-subject; trust not reported,,
77,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,2.50,0.50,1.0,5.0,2021.0,101.0,Multiple,4.0,No,No,False,,What,values are guessed; post,
78,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,2.90,0.40,1.0,5.0,2021.0,101.0,Multiple,4.0,No,No,False,,What if,values are guessed; post,
79,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,3.00,0.50,1.0,5.0,2021.0,101.0,Multiple,4.0,No,No,False,,Why,values are guessed; post,


# Define own functions

In [7]:
# Function to normalize a value
def normalize(x_vector, old_min, old_max, new_min, new_max):
    return new_min + ((x_vector - old_min) / (old_max - old_min)) * (new_max - new_min)

# Function to adjust standard deviation
#def adjust_sd(sd_vector, old_range, new_range):
#    return sd_vector * (new_range / old_range)

def adjust_sd(sd_vector, old_range, new_range, omit_na=False, make_na_to_one=False):
    #sd_vector = sd_vector.copy()
    if omit_na:
        # If any sd is NA, replace with NA
        sd_vector = sd_vector.where(pd.notna(sd_vector), np.nan)
    if make_na_to_one:
        # If any sd is NA, replace with 1.0
        sd_vector = sd_vector.where(pd.notna(sd_vector), 1.0)
    return sd_vector * (new_range / old_range)

# Set color palette
cavalcanti1 = mcolors.LinearSegmentedColormap.from_list("Cavalcanti1", ["#F2D7D5", "#D4AC0D"], N=2)
ownPalette = mcolors.LinearSegmentedColormap.from_list("Cavalcanti1", ["#000000", "#E69F00"], N=2)
visualization_levels = ["None", "Sit. Detection", "Sit. Prediction", "Man. Planning", "Multiple"]

# Make first row to header (not necessary from `.csv` but for Google Drive)

In [8]:
#main_df.rename(columns=main_df.iloc[0])

#headers = main_df.iloc[0]
#main_df  = pd.DataFrame(main_df.values[1:], columns=headers)
#main_df

In [9]:
# Convert data types
#main_df['Mean'] = pd.to_numeric(main_df['Mean'].str.replace(',', '.'), errors='coerce')
#main_df['SD'] = pd.to_numeric(main_df['SD'].str.replace(',', '.'), errors='coerce')

main_df['scale-min'] = pd.to_numeric(main_df['scale-min'], errors='coerce')
main_df['scale-max'] = pd.to_numeric(main_df['scale-max'], errors='coerce')


# Filter out rows with Exclude is True
#main_df = main_df[main_df['Exclude'] != "TRUE"]
# Filter out rows with Exclude is True and create an explicit copy
main_df = main_df[main_df['Exclude'] != "TRUE"].copy()


# show data?
#main_df

In [10]:
# Normalize trust and adjust SD
main_df['normalized_trust'] = normalize(main_df['Mean'], main_df['scale-min'], main_df['scale-max'], new_min=1, new_max=5)
main_df['normalized_sd'] = adjust_sd(main_df['SD'], old_range=main_df['scale-max'] - main_df['scale-min'], new_range=4, make_na_to_one=True)

# Reorder the 'Visualization-level' column
main_df['Visualization-level'] = pd.Categorical(main_df['Visualization-level'], categories=visualization_levels, ordered=True)

# Convert columns to factor
main_df['Uncertainty'] = main_df['Uncertainty'].astype('category')
main_df['Own'] = main_df['Own'].astype('category')

# Creating the citation labels
main_df['Paper_adj'] = "\\cite{" + main_df['Paper'] + "}"


main_df
#main_df['Paper_adj_2'] = "[" + main_df['Paper'] + "]"


Unnamed: 0,Paper,URL,Country,Mean,SD,scale-min,scale-max,year,N,Visualization-level,...,Uncertainty,Own,Exclude,Reason,Remarks,Unnamed: 16,Unnamed: 17,normalized_trust,normalized_sd,Paper_adj
0,locken2016autoambicar,https://dl.acm.org/doi/10.1145/3379336.3381502,Germany,,,,,,,,...,,No,True,No mean provided,,,,,,\cite{locken2016autoambicar}
1,wilbrink2020reflecting,http://dx.doi.org/10.1145/3004323.3004329,Germany,,,,,,,,...,,No,True,No mean provided,,,,,,\cite{wilbrink2020reflecting}
2,lindemann2018catch,https://www.mdpi.com/349948,Germany,4.62,1.41,1.0,7.0,2018.0,32.0,,...,No,No,False,,Took high visibility,,,3.413333,0.94,\cite{lindemann2018catch}
3,lindemann2018catch,https://www.mdpi.com/349948,Germany,5.53,1.08,1.0,7.0,2018.0,32.0,Multiple,...,No,No,False,,Took high visibility,,,4.020000,0.72,\cite{lindemann2018catch}
4,currano2021little,,USA,,,,,,,,...,,No,True,No Trust,,,,,,\cite{currano2021little}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,schneider2023dont,https://dl.acm.org/doi/pdf/10.1145/3581641.358...,Germany,,,1.0,5.0,2023.0,113.0,,...,No,No,True,,between-subject; trust not reported,,,,1.00,\cite{schneider2023dont}
77,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,2.50,0.50,1.0,5.0,2021.0,101.0,Multiple,...,No,No,False,,What,values are guessed; post,,2.500000,0.50,\cite{omeiza2021not}
78,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,2.90,0.40,1.0,5.0,2021.0,101.0,Multiple,...,No,No,False,,What if,values are guessed; post,,2.900000,0.40,\cite{omeiza2021not}
79,omeiza2021not,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,USA,3.00,0.50,1.0,5.0,2021.0,101.0,Multiple,...,No,No,False,,Why,values are guessed; post,,3.000000,0.50,\cite{omeiza2021not}


In [11]:
# ordering for the x-axis
main_df = main_df.sort_values(by="Visualization-level", ascending=True)

# Creating a color map for the 'Own' column
unique_owns = main_df['Own'].unique()
colors = {own: ownPalette(i) for i, own in enumerate(unique_owns)}

# Define the mapping from visualization levels to numbers
vis_level_mapping = {level: i for i, level in enumerate(main_df['Visualization-level'].unique())}

# This would be the correct one, for the thesis this was done in R

Do not execute, is buggy

In [12]:
"""
fig, ax = plt.subplots(figsize=(7,9))

# Define the mapping from visualization levels to numbers
vis_level_mapping = {level: i for i, level in enumerate(main_df['Visualization-level'].unique())}

plt.style.use('seaborn-v0_8-colorblind')

texts = []  # Create an empty list to hold the text objects
points = []  # Create an empty list to hold the points

for i, row in main_df.iterrows():

    x = vis_level_mapping[row['Visualization-level']]
    y = row['normalized_trust']

    ax.scatter(x, y, s=row['normalized_sd']*100, color=colors[row['Own']], alpha=0.8)
    text = ax.text(x, y, row['Paper_adj'])

    texts.append(text)  # Add the text object to the list
    points.append((x, y))  # Add the point to the list

# Use adjust_text to iterate through the text objects and adjust their position to minimize overlaps
adjust_text(texts)

# Draw arrows from points to their corresponding adjusted texts
for text, (x, y) in zip(texts, points):
    x_text, y_text = text.get_position()
    ax.annotate('', xy=(x, y), xytext=(x_text, y_text),
                arrowprops=dict(arrowstyle='-', color='black'))

# Set the x-ticks to be the names of the visualization levels
ax.set_xticks(range(len(vis_level_mapping)))
ax.set_xticklabels(vis_level_mapping.keys())

ax.set_ylim(1.5,4.5)
ax.set_ylabel('Trust in Automation (normalized)')
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')
ax.grid(True)

#plt.savefig('prior_work_categorized.pgf')
"""

"\nfig, ax = plt.subplots(figsize=(7,9))\n\n# Define the mapping from visualization levels to numbers\nvis_level_mapping = {level: i for i, level in enumerate(main_df['Visualization-level'].unique())}\n\nplt.style.use('seaborn-v0_8-colorblind')\n\ntexts = []  # Create an empty list to hold the text objects\npoints = []  # Create an empty list to hold the points\n\nfor i, row in main_df.iterrows():\n\n    x = vis_level_mapping[row['Visualization-level']]\n    y = row['normalized_trust']\n\n    ax.scatter(x, y, s=row['normalized_sd']*100, color=colors[row['Own']], alpha=0.8)\n    text = ax.text(x, y, row['Paper_adj'])\n\n    texts.append(text)  # Add the text object to the list\n    points.append((x, y))  # Add the point to the list\n\n# Use adjust_text to iterate through the text objects and adjust their position to minimize overlaps\nadjust_text(texts)\n\n# Draw arrows from points to their corresponding adjusted texts\nfor text, (x, y) in zip(texts, points):\n    x_text, y_text = text.

# Now visualize the own work in their relation to the Levels of Capability

##Prepare Data

In [13]:
#worksheet2 = gc.open('Dissertation-Own-Work').sheet1

# get_all_values gives a list of rows.
#rows = worksheet2.get_all_values()
#print(rows)

#main_df_2 = pd.DataFrame.from_records(rows)


url2 = 'https://github.com/M-Colley/colley-dissertation/blob/main/data/Dissertation-Own-Work.xlsx?raw=true'
main_df_2 = pd.read_excel(url2)

#headers = main_df_2.iloc[0]
#main_df_2  = pd.DataFrame(main_df_2.values[1:], columns=headers)

# Convert data types
#main_df_2['Mean'] = pd.to_numeric(main_df_2['Mean'].str.replace(',', '.'), errors='coerce')
#main_df_2['SD'] = pd.to_numeric(main_df_2['SD'].str.replace(',', '.'), errors='coerce')

#main_df_2['scale-min'] = pd.to_numeric(main_df_2['scale-min'], errors='coerce')
#main_df_2['scale-max'] = pd.to_numeric(main_df_2['scale-max'], errors='coerce')

#main_df_2['metric'] = pd.to_numeric(main_df_2['metric'].str.replace(',', '.'), errors='coerce')

# Filter out rows with Exclude is True
main_df_2 = main_df_2[main_df_2['Exclude'] != "TRUE"]



# Normalize trust and adjust SD
main_df_2['normalized_trust'] = normalize(main_df_2['Mean'], main_df_2['scale-min'], main_df_2['scale-max'], new_min=1, new_max=5)
main_df_2['normalized_sd'] = adjust_sd(main_df_2['SD'], old_range=main_df_2['scale-max'] - main_df_2['scale-min'], new_range=4, make_na_to_one=True)

main_df_2['Visualization-level'] = pd.Categorical(main_df_2['Visualization-level'], categories=visualization_levels, ordered=True)

# Convert columns to factor
main_df_2['Uncertainty'] = main_df_2['Uncertainty'].astype('category')
main_df_2['Own'] = main_df_2['Own'].astype('category')


# Creating the citation labels
main_df_2['Paper_adj'] = "\\cite{" + main_df_2['Paper'] + "}"
#main_df_2['publication_adj'] = "\\cite{" + main_df_2['publication'] + "}"
#main_df_2['text_combined'] = main_df_2.apply(lambda row: str(row['remarks_display']) + "; " + str(row['publication']) if row['publication'] else str(row['remarks_display']), axis=1)

main_df_2['text_combined'] = main_df_2.apply(
    lambda row: "; ".join(filter(pd.notna, [row['remarks_display'], row['publication']])),
    axis=1
)

# ordering for the x-axis
main_df_2 = main_df_2.sort_values(by="Visualization-level", ascending=True)

# Creating a color map for the 'Own' column
unique_owns = main_df_2['Own'].unique()
colors = {own: ownPalette(i) for i, own in enumerate(unique_owns)}

# Define the mapping from visualization levels to numbers
vis_level_mapping = {level: i for i, level in enumerate(main_df_2['Visualization-level'].unique())}

#print(main_df_2)

## Try with matplotlib

Do not run, is currently not needed

In [14]:
"""
# available palettes: https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html
plt.style.use('seaborn-v0_8-colorblind')

# Define the mapping from visualization levels to numbers
vis_level_mapping_2 = {level: i for i, level in enumerate(main_df_2['metric'].unique())}

# Get the unique visualization levels
vis_levels = main_df_2['Visualization-level'].unique()

# Determine the number of rows for the subplot grid
n = len(vis_levels)

fig, axs = plt.subplots(n, 1, figsize=(10, 5*n), sharex=True, sharey=True)

for ax, level in zip(axs, vis_levels):
    data = main_df_2[main_df_2['Visualization-level'] == level]

    for i, row in data.iterrows():
        ax.scatter(vis_level_mapping_2[row['metric']], row['normalized_trust'], s=row['normalized_sd']*100, alpha=0.8)
        ax.text(vis_level_mapping_2[row['metric']], row['normalized_trust'], row['Paper_adj'])
        ax.text(vis_level_mapping_2[row['metric']] + 0.8, row['normalized_trust'], row['text_combined'])

    ax.set_title('Visualization-level: ' + str(level))
    ax.set_ylim(1,5)
    ax.set_ylabel('Trust in Automation')
    ax.set_axisbelow(True)
    ax.yaxis.grid(color='gray', linestyle='dashed')
    ax.grid(True)

plt.xlabel('Metric')
plt.tight_layout()

plt.savefig('prior_work_categorized_only_own.pgf')
"""

"\n# available palettes: https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html\nplt.style.use('seaborn-v0_8-colorblind')\n\n# Define the mapping from visualization levels to numbers\nvis_level_mapping_2 = {level: i for i, level in enumerate(main_df_2['metric'].unique())}\n\n# Get the unique visualization levels\nvis_levels = main_df_2['Visualization-level'].unique()\n\n# Determine the number of rows for the subplot grid\nn = len(vis_levels)\n\nfig, axs = plt.subplots(n, 1, figsize=(10, 5*n), sharex=True, sharey=True)\n\nfor ax, level in zip(axs, vis_levels):\n    data = main_df_2[main_df_2['Visualization-level'] == level]\n\n    for i, row in data.iterrows():\n        ax.scatter(vis_level_mapping_2[row['metric']], row['normalized_trust'], s=row['normalized_sd']*100, alpha=0.8)\n        ax.text(vis_level_mapping_2[row['metric']], row['normalized_trust'], row['Paper_adj'])\n        ax.text(vis_level_mapping_2[row['metric']] + 0.8, row['normalized_trust'], row['te

## Try with Seaborn

This generates one plot with 3 subplots

In [15]:
"""
# Define the mapping from visualization levels to numbers
vis_level_mapping_2 = {level: i for i, level in enumerate(main_df_2['metric'].unique())}

# Create a facet grid
g = sns.FacetGrid(main_df_2, col='Visualization-level', col_wrap=2, height=5)


texts = []  # Create an empty list to hold the text objects


# Define the function to create a scatterplot on an Axes
def scatterplot(data, color, label=None):
    ax = plt.gca()
    for _, row in data.iterrows():
        x = row['metric']#vis_level_mapping_2.get(row['metric'], 1)
        y = row['normalized_trust']
        ax.scatter(x, y, s=row['normalized_sd']*100, alpha=0.8)

        text = ax.text(x, y, row['Paper_adj'])
        texts.append(text)  # Add the text object to the list


        ax.text(x, y, row['Paper_adj'])
        ax.text(x + 14, y, row['text_combined'])

        # Add x value of the data point as a text label on the x-axis
        #ax.text(x, 0, row['metric'], ha='center', va='top')

        # Add a vertical line from the data point down to the x-axis
        ax.axvline(x, 0, y, color=color, alpha=0.15)


    # Use adjust_text to iterate through the text objects and adjust their position to minimize overlaps
    #adjust_text(texts)

    # Draw arrows from points to their corresponding adjusted texts
    #for text, (x, y) in zip(texts, points):
    #    x_text, y_text = text.get_position()
    #    ax.annotate('', xy=(x, y), xytext=(x_text, y_text),
    #                arrowprops=dict(arrowstyle='-', color='black'))
    ax.set_ylim(1, 5)
    ax.set_ylabel('Trust in Automation')
    ax.set_axisbelow(True)
    ax.yaxis.grid(color='gray', linestyle='dashed')
    ax.grid(True)

    ax.set_xlim(0, 100)


# Apply this function to each Axes in the grid
g.map_dataframe(scatterplot)

# Automatically adjust subplot params so that the subplots fits in to the figure area.
plt.tight_layout()

plt.savefig('prior_work_categorized_only_own_test_sns.pgf')
"""

"\n# Define the mapping from visualization levels to numbers\nvis_level_mapping_2 = {level: i for i, level in enumerate(main_df_2['metric'].unique())}\n\n# Create a facet grid\ng = sns.FacetGrid(main_df_2, col='Visualization-level', col_wrap=2, height=5)\n\n\ntexts = []  # Create an empty list to hold the text objects\n\n\n# Define the function to create a scatterplot on an Axes\ndef scatterplot(data, color, label=None):\n    ax = plt.gca()\n    for _, row in data.iterrows():\n        x = row['metric']#vis_level_mapping_2.get(row['metric'], 1)\n        y = row['normalized_trust']\n        ax.scatter(x, y, s=row['normalized_sd']*100, alpha=0.8)\n\n        text = ax.text(x, y, row['Paper_adj'])\n        texts.append(text)  # Add the text object to the list\n\n\n        ax.text(x, y, row['Paper_adj'])\n        ax.text(x + 14, y, row['text_combined'])\n\n        # Add x value of the data point as a text label on the x-axis\n        #ax.text(x, 0, row['metric'], ha='center', va='top')\n\n  

# Visualization of Trust in Automation Based on Different Visualization Levels

This script is designed to visualize trust in automation based on various visualization levels, using a dataset that includes specific metrics and normalized values. The process includes:

1. **Exclusion of Specific Values:**
   - The data is filtered to exclude specific values such as 'None' and 'Multiple', resulting in a DataFrame that contains only the desired visualization levels.

2. **Categorization and Ordering:**
   - The visualization levels are categorized into specific groups like "Sit. Detection," "Sit. Prediction," and "Man. Planning," and are ordered accordingly.

3. **Creation of FacetGrid:**
   - A Seaborn FacetGrid is created, dividing the data into different facets based on the 'Visualization-level'. The grid is configured with specific dimensions and settings.

4. **Custom Scatterplot Function:**
   - A custom function, `scatterplot_1`, is defined to create scatterplots for each facet.
   - Each scatterplot represents different metrics and normalized trust values, with point sizes based on the normalized standard deviation.
   - Additional annotations, texts, and vertical lines are added to enhance readability and provide context for each data point.

5. **Mapping Scatterplot Function to FacetGrid:**
   - The custom scatterplot function is applied to each facet in the grid, resulting in a multi-faceted visualization.

6. **Final Adjustments and Saving:**
   - The layout is automatically adjusted to ensure that the subplots fit properly.
   - The final plot is saved as a PGF file named 'prior_work_categorized_only_own_test_sns_1.pgf'.

The resulting visualization provides an insightful representation of trust in automation, categorized by different visualization levels. It offers a comprehensive view of the relationship between various metrics and trust levels, facilitating further analysis and interpretation.


---



# This makes singular plots



In [16]:
# List of values to exclude
exclude_values = ['None', 'Multiple']

# Create a filtered DataFrame
filtered_df = main_df_2[~main_df_2['Visualization-level'].isin(exclude_values)].copy()

# Remove rows where 'normalized_trust' is NA
filtered_df = filtered_df.dropna(subset=['normalized_trust'])

# Specify the order for 'Visualization-level'
visualization_levels_1 = ["Sit. Detection", "Sit. Prediction", "Man. Planning"]
filtered_df['Visualization-level'] = pd.Categorical(filtered_df['Visualization-level'], categories=visualization_levels_1, ordered=True)

# Define a dictionary to hold xlim values for each level
xlim_dict = {
    "Sit. Detection": (67, 88),
    "Sit. Prediction": (0, 85),
    "Man. Planning": (69, 85)
}

color_dict = {
    "Sit. Detection": 'blue',
    "Sit. Prediction": 'orange',
    "Man. Planning": 'purple'
}

xname_dict = {
    "Sit. Detection": "mIoU",
    "Sit. Prediction": "AP or minFDE",
    "Man. Planning": "Manuever Planning capability"
}


# Define the function to create a scatterplot on an Axes
def scatterplot_1(data, level):
    # Create a new figure and axes
    fig, ax = plt.subplots()

    # Loop through each row in the DataFrame
    for _, row in data.iterrows():
        x = row['metric']
        y = row['normalized_trust']

        # Plot the scatter point
        ax.scatter(x, y, s=row['normalized_sd']*100, alpha=0.8, color=color_dict[level])

        # Add text labels
        fontsize = 13
        ax.text(x, y, row['Paper_adj'], fontsize=fontsize)
        # Only add this line when it's not 'Sit. Detection'
        if row['Visualization-level'] == 'Sit. Prediction':
            ax.text(x + 5, y, row['text_combined'], fontsize=fontsize)
        if row['Visualization-level'] == 'Man. Planning':
            ax.text(x + 1, y, row['text_combined'], fontsize=fontsize)

        # Add vertical line from data point to x-axis
        # removed bc of visual clutter
        # ax.axvline(x, 0, y, color=color_dict[level], alpha=0.1, zorder=0, linestyle='dashed')

    # Additional plot settings
    ax.set_ylim(2, 5)
    ax.set_ylabel('Trust in Automation (Low to High)')
    ax.set_axisbelow(True)
    ax.yaxis.grid(color='gray', linestyle='dashed')
    ax.grid(True)
    # Set axis labels and limits
    ax.set_xlabel(xname_dict[level])
    ax.set_xlim(xlim_dict[level])

    # Save the figure
    plt.savefig(f'prior_work_{data.iloc[0]["Visualization-level"]}.pgf')

# Loop through unique 'Visualization-level' categories and plot each one
for level in visualization_levels_1:
    sub_df = filtered_df[filtered_df['Visualization-level'] == level]
    scatterplot_1(sub_df, level)


# This is the one I use in the disserstation

In [51]:
import matplotlib.lines as mlines


# List of values to exclude
exclude_values = ['None', 'Multiple']

# Create a filtered DataFrame
filtered_df = main_df_2[~main_df_2['Visualization-level'].isin(exclude_values)].copy()

# Remove rows where 'normalized_trust' is NA
filtered_df = filtered_df.dropna(subset=['normalized_trust'])

# Specify the order for 'Visualization-level'
visualization_levels_1 = ["Sit. Detection", "Sit. Prediction", "Man. Planning"]
filtered_df['Visualization-level'] = pd.Categorical(filtered_df['Visualization-level'], categories=visualization_levels_1, ordered=True)

# Define a dictionary to hold xlim values for each level
xlim_dict = {
    "Sit. Detection": (67, 88),
    "Sit. Prediction": (0, 85),
    "Man. Planning": (69, 85)
}

color_dict = {
    "Sit. Detection": 'blue',
    "Sit. Prediction": 'orange',
    "Man. Planning": 'purple'
}

xname_dict = {
    "Sit. Detection": "mIoU",
    "Sit. Prediction": "AP or minFDE",
    "Man. Planning": "Manuever Planning capability"
}


# Define a list of distinct colors
distinct_colors = [
    '#e6194B', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#46f0f0',
    '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8',
    '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#000000'
]


# Generate a color dictionary for each unique 'Paper_adj'
unique_papers = filtered_df['Paper_adj'].unique()
#color_dict = {paper: plt.cm.tab20(i) for i, paper in enumerate(unique_papers)}
# Generate a color dictionary for each unique 'Paper_adj'
color_dict = {paper: distinct_colors[i % len(distinct_colors)] for i, paper in enumerate(unique_papers)}


# Function to create a scatter plot on a given Axes
def scatterplot_1(ax, data, level, is_middle):
    # Loop through each unique paper to plot
    for paper in unique_papers:
        subset = data[data['Paper_adj'] == paper]

        ax.scatter(subset['metric'], subset['normalized_trust'],
                   s=np.pi * ((subset['normalized_sd']*10) ** 2), alpha=0.8,
                   color=color_dict[paper], label=paper)



    # subset['normalized_sd']*100


    # Optional: Add text labels for Sit. Prediction and Man. Planning based on 'text_combined'
    # ... (your existing logic for adding text labels based on 'Visualization-level')

    # Set y-axis label only for the middle subplot
    if is_middle:
        ax.set_ylabel('Trust in Automation (Low to High)', labelpad=20, fontsize=21)  # Increased labelpad for distance

    # Set x-axis labels and limits
    ax.set_xlabel(xname_dict[level])
    ax.set_xlim(xlim_dict[level])

    # Additional plot settings
    ax.set_ylim(2, 5)
    ax.set_axisbelow(True)
    ax.yaxis.grid(color='gray', linestyle='dashed')
    ax.grid(True)

# Create a new figure with subplots
fig, axes = plt.subplots(3, 1, figsize=(15, 15), sharey=True)  # Share y-axis

# Plot each level on a separate subplot
for i, level in enumerate(visualization_levels_1):
    sub_df = filtered_df[filtered_df['Visualization-level'] == level]
    is_middle = i == 1  # Only the middle subplot
    scatterplot_1(axes[i], sub_df, level, is_middle)

# Create a legend with all unique papers from all subplots
handles, labels = [], []
for ax in axes:
    for handle, label in zip(*ax.get_legend_handles_labels()):
        if label not in labels:
            handles.append(handle)
            labels.append(label)


# Create custom legend handles with the desired marker size
custom_handles = [mlines.Line2D([], [], color=color_dict[paper], marker='o', linestyle='None',
                                markersize=np.sqrt(np.pi*7**2), # Size of the legend marker
                                label=paper) for paper in unique_papers]

# Place the legend above the subplots and store the legend object in a variable
legend = fig.legend(handles=custom_handles,
                    loc='upper center', ncol=len(labels),
                    title='Paper Reference', fontsize='large',
                    labelspacing=1.0, columnspacing=5.5,
                    title_fontsize=20)
#legend = fig.legend(handles, labels, loc='upper center', ncol=len(labels), title='Paper Reference', fontsize='large')

# Remove the border of the legend by setting the edge color to 'none'
legend.get_frame().set_edgecolor('none')  # This will work now

# Adjust the layout to make room for the legend and prevent overlap
# Modify the rect parameter to leave more space at the top for the legend
plt.tight_layout(rect=[0, 0.03, 1, 0.925])  # Adjust the bottom and top

# Place the legend above the subplots and further away from the plot
# Adjust the 'bbox_to_anchor' to move the legend up
#fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, 1.02),
#           ncol=len(labels), title='Paper Reference', fontsize='small')

# Save the figure
plt.savefig('combined_prior_work.pgf', bbox_inches='tight')

plt.show()  # Display the combined plot


# This is the one to have one single figure

In [19]:
# List of values to exclude
exclude_values = ['None', 'Multiple']

# Create a filtered DataFrame
filtered_df = main_df_2[~main_df_2['Visualization-level'].isin(exclude_values)].copy()

visualization_levels_1 = ["Sit. Detection", "Sit. Prediction", "Man. Planning"]
filtered_df['Visualization-level'] = pd.Categorical(filtered_df['Visualization-level'], categories=visualization_levels_1, ordered=True)

# Now use the filtered DataFrame to create your FacetGrid
# TODO adjust col_wrap
g = sns.FacetGrid(filtered_df, col='Visualization-level', col_wrap=2, height=5)


texts = []  # Create an empty list to hold the text objects


# Define the function to create a scatterplot on an Axes
def scatterplot_1(data, color, label=None):
    ax = plt.gca()
    for _, row in data.iterrows():
        x = row['metric']#vis_level_mapping_2.get(row['metric'], 1)
        y = row['normalized_trust']
        ax.scatter(x, y, s=row['normalized_sd']*100, alpha=0.8)

        fontsize = 13  # Adjust this value to set the desired font size

        text = ax.text(x, y, row['Paper_adj'], fontsize=fontsize)
        texts.append(text)  # Add the text object to the list


        ax.text(x, y, row['Paper_adj'], fontsize=fontsize)
        #ax.text(x + 20, y, row['text_combined'], fontsize=fontsize)

        # Add x value of the data point as a text label on the x-axis
        #ax.text(x, 0, row['metric'], ha='center', va='top')

        # Add a vertical line from the data point down to the x-axis
        ax.axvline(x, 0, y, color=color, alpha=0.1, zorder=0, linestyle='dashed')


    # Use adjust_text to iterate through the text objects and adjust their position to minimize overlaps
    #adjust_text(texts)

    # Draw arrows from points to their corresponding adjusted texts
    #for text, (x, y) in zip(texts, points):
    #    x_text, y_text = text.get_position()
    #    ax.annotate('', xy=(x, y), xytext=(x_text, y_text),
    #                arrowprops=dict(arrowstyle='-', color='black'))
    ax.set_ylim(2, 5)
    #ax.set_ylim(1, 5) # original version
    ax.set_ylabel('Trust in Automation (Low to High)')
    ax.set_axisbelow(True)
    ax.yaxis.grid(color='gray', linestyle='dashed')
    ax.grid(True)

    ax.set_xlim(0, 100)


# Apply this function to each Axes in the grid
g.map_dataframe(scatterplot_1)

# Automatically adjust subplot params so that the subplots fits in to the figure area.
plt.tight_layout()


plt.savefig('prior_work_categorized_only_own_test_sns_1.pgf')

# Visualization of Interest, Ease, and Expected Reality for Automated Vehicles (AVs)

This script performs an analysis and visualization of the expectation that autonomous vehicles (AVs) will become a reality in the next 10 years, whether it will ease the lives of users, and their interest. The process includes the following key steps:

1. **Data Grouping:** The data is grouped by year, citation, and a core categorization, and the mean and standard deviation of a specified numeric variable are computed for each group.

2. **Color Mapping:** A colormap is generated to represent unique core categorizations, and each unique core value is mapped to a specific color.

3. **Scatter Plot Creation:** A scatter plot is created to visualize the mean expectation reality over different years. The plot includes:
   - Points representing the mean values, with sizes based on the standard deviation.
   - Colors representing different core categorizations.
   - Custom labels, grid lines, and axis ticks.
   - A regression line that fits the trend of the data.

4. **Legend Construction:** A legend is added to the plot, with each color corresponding to a unique core categorization.

5. **Saving the Plot:** The final plot is adjusted for layout and saved as a PGF file, providing a visual representation for different citations over the years.


## The combined dataset (dissertation_colley_combined_interest_ease_reality) over the years is looked at here


In [20]:
#worksheet3 = gc.open('combined_interest_ease_reality').sheet1


# get_all_values gives a list of rows.
#rows = worksheet3.get_all_values()
#print(rows)

#main_df_3 = pd.DataFrame.from_records(rows)


url3 = 'https://github.com/M-Colley/colley-dissertation/blob/main/data/dissertation_colley_combined_interest_ease_reality.csv?raw=true'
main_df_3 = pd.read_csv(url3, sep=';')

#headers = main_df_3.iloc[0]
#main_df_3  = pd.DataFrame(main_df_3.values[1:], columns=headers)

main_df_3['year'] = main_df_3['year'].astype(int)
main_df_3['interest'] = main_df_3['interest'].astype(int)
main_df_3['ease'] = main_df_3['ease'].astype(int)
main_df_3['reality'] = main_df_3['reality'].astype(int)

main_df_3


Unnamed: 0,interest,ease,reality,year,Core,citation,doi
0,5,3,2,2020,Yes,\cite{core-colley2020effect},10.1145/3409120.3410648
1,5,5,4,2020,Yes,\cite{core-colley2020effect},10.1145/3409120.3410648
2,5,4,5,2020,Yes,\cite{core-colley2020effect},10.1145/3409120.3410648
3,3,1,4,2020,Yes,\cite{core-colley2020effect},10.1145/3409120.3410648
4,4,4,4,2020,Yes,\cite{core-colley2020effect},10.1145/3409120.3410648
...,...,...,...,...,...,...,...
1673,5,5,4,2023,Yes,\cite{core-colley2024bo},
1674,4,4,4,2023,Yes,\cite{core-colley2024bo},
1675,4,5,5,2023,Yes,\cite{core-colley2024bo},
1676,5,5,3,2023,Yes,\cite{core-colley2024bo},


In [21]:
# Compute the mean and standard deviation of 'numeric_variable' for each year and citation
grouped_df = main_df_3.groupby(['year', 'citation', 'Core'])['interest'].agg(['mean', 'std']).reset_index()

# uncomment to print
#print(grouped_df)

# Create a colormap with as many colors as there are unique citations
#colormap = ListedColormap(sns.color_palette('husl', n_colors=grouped_df['citation'].nunique()))
colormap = ListedColormap(sns.color_palette('coolwarm', n_colors=grouped_df['Core'].nunique()))


# Create a dictionary mapping each unique citation to a color
color_dict = dict(zip(grouped_df['Core'].unique(), colormap.colors))

# Create a new column in the DataFrame with the color corresponding to each citation
grouped_df['color'] = grouped_df['Core'].map(color_dict)

# Create a scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

# Increase the size multiplier for the standard deviation
scatter = ax.scatter(grouped_df['year'], grouped_df['mean'], s=grouped_df['std']*100, color=grouped_df['color'], alpha=0.8)

# Adding citation as text annotation for each point
for i in range(len(grouped_df)):
    ax.text(grouped_df.loc[i, 'year']+0.05, grouped_df.loc[i, 'mean'], grouped_df.loc[i, 'citation'], fontsize='large')

plt.xlabel('Year', fontsize='x-large')
plt.ylabel('Mean of interest', fontsize='x-large')
plt.title('Mean of interest for each citation over years', fontsize='x-large')
plt.grid(True, linestyle='dashed')

# Dynamically set y-ticks based on data and round to two decimal places
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2f}'.format(y)))
plt.yticks(np.linspace(grouped_df['mean'].min(), grouped_df['mean'].max(), 5))

# Adjust the x-axis to only show the values 2019, 2020, and 2021
plt.xticks([2019, 2020, 2021, 2023])
# Adjust the x-axis limits to add padding on the left and right
ax.set_xlim(2018.5, 2023.5)

plt.savefig('interest.pgf')


In [22]:
# Compute the mean and standard deviation of 'numeric_variable' for each year and citation
grouped_df = main_df_3.groupby(['year', 'citation', 'Core'])['interest'].agg(['mean', 'std']).reset_index()

# Create a colormap with as many colors as there are unique citations
#colormap = ListedColormap(sns.color_palette('husl', n_colors=grouped_df['citation'].nunique()))
colormap = ListedColormap(sns.color_palette('coolwarm', n_colors=grouped_df['Core'].nunique()))


# Create a dictionary mapping each unique citation to a color
color_dict = dict(zip(grouped_df['Core'].unique(), colormap.colors))

# Create a new column in the DataFrame with the color corresponding to each citation
grouped_df['color'] = grouped_df['Core'].map(color_dict)


# Create a scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

scatter = ax.scatter(grouped_df['year'], grouped_df['mean'], s=grouped_df['std']*100, color=grouped_df['color'], alpha=0.8)

# Adding citation as text annotation for each point
#for i in range(len(grouped_df)):
#    ax.text(grouped_df.loc[i, 'year']+0.05, grouped_df.loc[i, 'mean'], grouped_df.loc[i, 'citation'], fontsize='x-large')

plt.xlabel('Year', fontsize='x-large')
plt.ylabel('Mean of interest', fontsize='x-large')
plt.title('Mean of interest for each citation over years', fontsize='x-large')


plt.grid(True, linestyle='dashed')

# Dynamically set y-ticks based on data and round to two decimal places
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2f}'.format(y)))
plt.yticks(np.linspace(grouped_df['mean'].min(), grouped_df['mean'].max(), 5))

# Adjust the x-axis to only show the values 2019, 2020, and 2021
plt.xticks([2019, 2020, 2021, 2023])
# Adjust the x-axis limits to add padding on the left and right
ax.set_xlim(2018.5, 2023.5)

# Compute the parameters of the regression line
slope, intercept = np.polyfit(grouped_df['year'], grouped_df['mean'], 1)

# Add the regression line to the plot
plt.plot(grouped_df['year'], slope*grouped_df['year'] + intercept, color='red')

# Create legend handles manually
legend_elements = [Patch(facecolor=color_dict[c], edgecolor=color_dict[c], label=c) for c in grouped_df['Core'].unique()]

# Create the legend
ax.legend(handles=legend_elements, title='Core')

plt.tight_layout()
plt.savefig('interest_2.pgf')


In [23]:
# Compute the mean and standard deviation of 'numeric_variable' for each year and citation
grouped_df = main_df_3.groupby(['year', 'citation', 'Core'])['ease'].agg(['mean', 'std']).reset_index()

# Create a colormap with as many colors as there are unique citations
#colormap = ListedColormap(sns.color_palette('husl', n_colors=grouped_df['citation'].nunique()))
colormap = ListedColormap(sns.color_palette('coolwarm', n_colors=grouped_df['Core'].nunique()))


# Create a dictionary mapping each unique citation to a color
color_dict = dict(zip(grouped_df['Core'].unique(), colormap.colors))

# Create a new column in the DataFrame with the color corresponding to each citation
grouped_df['color'] = grouped_df['Core'].map(color_dict)

# Create a scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

# Increase the size multiplier for the standard deviation
scatter = ax.scatter(grouped_df['year'], grouped_df['mean'], s=grouped_df['std']*100, color=grouped_df['color'], alpha=0.8)

# Adding citation as text annotation for each point
#for i in range(len(grouped_df)):
#    ax.text(grouped_df.loc[i, 'year'], grouped_df.loc[i, 'mean'], grouped_df.loc[i, 'citation'], fontsize='x-large')

plt.xlabel('Year', fontsize='x-large')
plt.ylabel('Mean of easing of life', fontsize='x-large')
plt.title('Mean of easing of life for each citation over years', fontsize='x-large')
plt.grid(True, linestyle='dashed')
#plt.yticks([3, 4, 5])

# Dynamically set y-ticks based on data and round to two decimal places
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2f}'.format(y)))
plt.yticks(np.linspace(grouped_df['mean'].min(), grouped_df['mean'].max(), 5))

# Adjust the x-axis to only show the values 2019, 2020, and 2021
plt.xticks([2019, 2020, 2021, 2023])
# Adjust the x-axis limits to add padding on the left and right
ax.set_xlim(2018.5, 2023.5)


# Compute the parameters of the regression line
slope, intercept = np.polyfit(grouped_df['year'], grouped_df['mean'], 1)

# Add the regression line to the plot
plt.plot(grouped_df['year'], slope*grouped_df['year'] + intercept, color='red')


# Create legend handles manually
legend_elements = [Patch(facecolor=color_dict[c], edgecolor=color_dict[c], label=c) for c in grouped_df['Core'].unique()]

# Create the legend
ax.legend(handles=legend_elements, title='Core')

plt.tight_layout()
plt.savefig('ease.pgf')

In [24]:
# Compute the mean and standard deviation of 'numeric_variable' for each year and citation
grouped_df = main_df_3.groupby(['year', 'citation', 'Core'])['reality'].agg(['mean', 'std']).reset_index()

# Create a colormap with as many colors as there are unique citations
#colormap = ListedColormap(sns.color_palette('husl', n_colors=grouped_df['citation'].nunique()))
colormap = ListedColormap(sns.color_palette('coolwarm', n_colors=grouped_df['Core'].nunique()))


# Create a dictionary mapping each unique citation to a color
color_dict = dict(zip(grouped_df['Core'].unique(), colormap.colors))

# Create a new column in the DataFrame with the color corresponding to each citation
grouped_df['color'] = grouped_df['Core'].map(color_dict)

# Create a scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

# Increase the size multiplier for the standard deviation
scatter = ax.scatter(grouped_df['year'], grouped_df['mean'], s=grouped_df['std']*100, color=grouped_df['color'], alpha=0.8)

# Adding citation as text annotation for each point
#for i in range(len(grouped_df)):
    #ax.text(grouped_df.loc[i, 'year'], grouped_df.loc[i, 'mean'], grouped_df.loc[i, 'citation'], fontsize='x-large')

plt.xlabel('Year', fontsize='x-large')
plt.ylabel('Mean of expectation reality in 10 years', fontsize='x-large')
plt.title('Mean of expectation that AVs are reality in 10 years for each citation over years', fontsize='x-large')
plt.grid(True, linestyle='dashed')

# Dynamically set y-ticks based on data and round to two decimal places
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2f}'.format(y)))
plt.yticks(np.linspace(grouped_df['mean'].min(), grouped_df['mean'].max(), 5))

# Adjust the x-axis to only show the values 2019, 2020, and 2021
plt.xticks([2019, 2020, 2021, 2023])
# Adjust the x-axis limits to add padding on the left and right
ax.set_xlim(2018.5, 2023.5)


# Compute the parameters of the regression line
slope, intercept = np.polyfit(grouped_df['year'], grouped_df['mean'], 1)

# Compute the predicted values and the R-squared score
predicted = slope*grouped_df['year'] + intercept
r_squared = r2_score(grouped_df['mean'], predicted)

# Add the regression line to the plot
plt.plot(grouped_df['year'], predicted, color='red')

# Create legend handles manually
legend_elements = [Patch(facecolor=color_dict[c], edgecolor=color_dict[c], label=c) for c in grouped_df['Core'].unique()]

# Create the legend
ax.legend(handles=legend_elements, title='Core')


# Add textual details about the regression
#regression_text = f'y = {slope:.2f}x + {intercept:.2f}, R^2 = {r_squared:.2f}'
#ax.text(0.05, 0.95, regression_text, transform=ax.transAxes, fontsize='large')


plt.tight_layout()
plt.savefig('reality.pgf')