In [None]:
import pandas as pd
import os
import plotly.express as px

# Input Dataset

In [None]:
dataframes = {}
path = os.getcwd() + "/innovaid_hackathon_anima/input/"

for file in os.listdir(path):
    if file.endswith(".csv"):
        # Build complete pathh
        file_path = os.path.join(path, file)
        
        # load single csv file
        dataframe = pd.read_csv(file_path)
        
        # add dataframe to the dictionary
        dataframes[file] = dataframe
len(dataframes)

In [None]:
keys_list = list(dataframes.keys())
keys_list[:2]

In [None]:
# Set number of patient for visualizations
patient = 1
df = dataframes[keys_list[patient]].copy()

In [None]:
# Compute velocity of gaze (module) for each timestamp in the DataFrame
def compute_velocity(df: pd.DataFrame):
    # Sort the DataFrame by the 'TIMESTAMP' column
    df.sort_values(by='TIMESTAMP')
    
    # Extract gaze data for two consecutive timestamps
    time0 = df.drop(index=len(df) - 1)  # Gaze data at the previous timestamp
    time1 = df.drop(index=0).reset_index()  # Gaze data at the current timestamp
    
    # Calculate the differences in gaze positions (delta)
    rx_delta = time0['RX'] - time1['RX']  # Change in x-coordinate
    ry_delta = time0['RY'] - time1['RY']  # Change in y-coordinate
    
    # Calculate the velocity as the Euclidean distance between consecutive gaze points
    velocity = (rx_delta**2 + ry_delta**2)**0.5
    
    # Add a zero at the beginning to match the length of the DataFrame
    velocity = pd.concat([pd.Series([0]), velocity])
    
    return velocity

# Add a new column 'velocity' to the DataFrame, containing the computed gaze velocities
df['velocity'] = compute_velocity(df).values


In [None]:
# Function to calculate and return average gaze velocities for each image type in a CSV file
def get_velocities(csv_path):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_path)
    
    # Sort the DataFrame by the 'TIMESTAMP' column (Note: The sorted DataFrame should be assigned back to df)
    df = df.sort_values(by='TIMESTAMP')
    
    # Extract gaze data for two consecutive timestamps
    time0 = df.drop(index=len(df) - 1)  # Gaze data at the previous timestamp
    time1 = df.drop(index=0).reset_index()  # Gaze data at the current timestamp
    
    # Calculate the differences in gaze positions (delta)
    rx_delta = time0['RX'] - time1['RX']  # Change in x-coordinate
    ry_delta = time0['RY'] - time1['RY']  # Change in y-coordinate
    
    # Calculate the velocity as the Euclidean distance between consecutive gaze points
    velocity = (rx_delta**2 + ry_delta**2)**0.5
    
    # Add a zero at the beginning to match the length of the DataFrame
    velocity = pd.concat([pd.Series([0]), velocity])
    
    # Add a new column 'velocity' to the DataFrame, containing the computed gaze velocities
    df['velocity'] = velocity.values
    
    # Group by 'IMAGE_TYPE' and calculate the mean gaze velocity for each image type
    img_velocities = df.groupby('IMAGE_TYPE')['velocity'].mean()
    
    # Convert the result to a dictionary and return it
    return dict(img_velocities)

# Example usage: Get gaze velocities from the third file in the specified directory
get_velocities(os.path.join(path, os.listdir(path)[2]))


In [None]:
# Plot gave behaviour through the test
fig = px.scatter(df, x = 'RX', y = 'RY', color = 'TIMESTAMP')
fig.show()

In [None]:
# Map IMAGE_TYPE for plotting
category_mapping = {'positive': 1, 'neutral': 0, 'negative': -1}
df['IMAGE_TYPE_enc'] = df['IMAGE_TYPE'].astype(str).map(category_mapping)
df.head()

In [None]:
# Plot parallel coordinates for highly dimensional data
fig = px.parallel_coordinates(df, color = 'SCENE_INDEX',
                              dimensions = ['SCENE_INDEX','TIMESTAMP', 'RY', 'RX','IMAGE_TYPE_enc'])
fig.show()

In [None]:
# Plot Image type distribution
fig = px.histogram(df, x = 'IMAGE_TYPE')
fig.show()

In [None]:
# Plot Image position distribution
fig = px.histogram(df, x='IMAGE_POSITION')
fig.show()

# Output Dataset

In [None]:
path = os.getcwd() + "/innovaid_hackathon_anima"
path = os.path.join(path, 'output.csv')

In [None]:
df_out = pd.read_csv(path)
df_out.head()

In [None]:
# Plot BDI result distribution
fig = px.histogram(df_out, x = 'BDI')
fig.show()

In [None]:
def mapping(el):
    if el < 9:
        return 'low'
    if el < 18:
        return 'mild'
    if el < 29:
        return 'med'
    return 'sev'

# Binning of the result
df_out['BDI_range'] = list(map(mapping, df_out['BDI']))
df_out.head()

In [None]:
# Plot new result distribution
fig = px.histogram(df_out, x = 'BDI_range')
fig.show()

In [None]:
# Create a pie chart
fig = px.pie(df_out, names='BDI_range', title='BDI Range Distribution')

# Customize the layout and style
fig.update_layout(
    title_text='Distribution of BDI Ranges',  # Update the title
    title_x=0.5,  # Center the title
    legend_title='BDI Ranges',  # Update the legend title
    template='seaborn',  # Set the template style (you can choose other templates)
    uniformtext_minsize=12,  # Set minimum text size for labels
    uniformtext_mode='hide',  # Hide labels if they don't fit
)

# Specify the full path along with the filename and extension
path = os.path.join(os.getcwd(), 'innovaid_hackathon_anima', 'images', 'BDI_distribution_pie_chart.png')

# Save the plot to the specified path
fig.write_image(path)

# Show the plot (optional)
fig.show()

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

# Assuming 'result' is the mapped result from the previous code
result_list = list(df_out['BDI_range'])

# Count occurrences using Counter
counted_values = Counter(result_list)

# Calculate percentages
total_values = len(result_list)
percentage_values = {key: value / total_values * 100 for key, value in counted_values.items()}

# Create a bar chart
keys = list(percentage_values.keys())
values = list(percentage_values.values())

plt.bar(keys, values)
plt.xlabel('Score')
plt.ylabel('Percentage')
plt.title('Percentage Distribution of BDI scores')
path = os.path.join(os.getcwd(), 'innovaid_hackathon_anima', 'images', 'bdi_distribution_bar_chart.png')
print(path)
plt.savefig(path)

plt.show()