In [35]:
# Load the image information in a list
import glob
import os

# List to store image paths
image_paths = []

# Define the search patterns for common image extensions
patterns = ['/**/*.jpg', '/**/*.jpeg', '/**/*.png', '/**/*.gif', '/**/*.bmp', '/**/*.tiff']

for pattern in patterns:
    # Use glob to find files matching the pattern
    image_paths.extend(glob.glob('images' + pattern, recursive=True))

In [36]:
from PIL import Image
import pandas as pd
import datetime

def get_image_metadata(image_path):
    with Image.open(image_path) as img:
        # Get metadata
        metadata = {
            'path': image_path,
            'mode': img.mode,
            'dimensions': img.size,
            'palette': img.palette,
            'info': img.info,
            'file_size': os.path.getsize(image_path),
            'creation_date': datetime.datetime.fromtimestamp(os.path.getctime(path)).strftime('%d-%m-%YT%H:%M:%S')
        }
    return metadata

# Get the metadata of each image
metadata_list = []
for path in image_paths:
    metadata = get_image_metadata(path)
    metadata_list.append(metadata)

# print(metadata_list)

# Create dataframe from the image data
df = pd.DataFrame(metadata_list)
df.head()

Unnamed: 0,path,mode,dimensions,palette,info,file_size,creation_date
0,images\demo-1.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",623475,07-05-2024T14:56:09
1,images\demo-10.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",815962,07-05-2024T14:56:09
2,images\demo-2.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",472271,07-05-2024T14:56:09
3,images\demo-3.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",658977,07-05-2024T14:56:09
4,images\demo-4.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",781426,07-05-2024T14:56:09


In [37]:
# Remove first 10 images, since they are old
df = df.iloc[10:]
df.head()

Unnamed: 0,path,mode,dimensions,palette,info,file_size,creation_date
10,images\user1\images\a_profile_picture\0.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",623475,13-06-2024T12:06:40
11,images\user1\images\a_profile_picture\1.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",658977,13-06-2024T12:06:40
12,images\user1\images\a_profile_picture\2.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",652117,13-06-2024T12:06:40
13,images\user1\images\a_profile_picture\3.jpg,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",879054,13-06-2024T12:06:40
14,images\user1\images\a_profile_picture\variatio...,RGB,"(1024, 1024)",,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",472271,13-06-2024T12:06:40


In [47]:
# Add a user and prompt column

# Function to extract user from the path
def extract_user_from_path(path):
    parts = path.split(os.sep)
    return parts[1]

def extract_prompt_from_path(path):
    parts = path.split(os.sep)
    prompt = parts[3].replace('_', ' ')
    return prompt

df['user'] = df['path'].apply(extract_user_from_path)
df['prompt'] = df['path'].apply(extract_prompt_from_path)
column_order = ['user', 'prompt', 'path', 'creation_date', 'file_size', 'mode', 'info', 'palette', 'dimensions']
df = df.reindex(columns=column_order)

df.head()

Unnamed: 0,user,prompt,path,creation_date,file_size,mode,info,palette,dimensions
10,user1,a profile picture,images\user1\images\a_profile_picture\0.jpg,2024-06-13 12:06:40,623475,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
11,user1,a profile picture,images\user1\images\a_profile_picture\1.jpg,2024-06-13 12:06:40,658977,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
12,user1,a profile picture,images\user1\images\a_profile_picture\2.jpg,2024-06-13 12:06:40,652117,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
13,user1,a profile picture,images\user1\images\a_profile_picture\3.jpg,2024-06-13 12:06:40,879054,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
14,user1,a profile picture,images\user1\images\a_profile_picture\variatio...,2024-06-13 12:06:40,472271,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"


In [54]:
# Calculate total time spent per session
# By first determining the first and last entry for a users session

df['creation_date'] = pd.to_datetime(df['creation_date'], format='%d-%m-%YT%H:%M:%S')

# Group by user and calculate total interaction time
df_grouped_user = df.groupby('user').agg({
    'creation_date': lambda x: (x.max() - x.min()).total_seconds()
}).reset_index()

df_grouped_user.rename(columns={'creation_date': 'dashboard_interaction_time_(seconds)'}, inplace=True)

df_grouped_user['dashboard_interaction_time'] = pd.to_timedelta(df_grouped_user['dashboard_interaction_time_(seconds)'], unit='s')

print(df_grouped_user)

    user  dashboard_interaction_time_(seconds) dashboard_interaction_time
0  user1                                   0.0            0 days 00:00:00
1  user2                                   1.0            0 days 00:00:01


In [58]:
# Group by prompt and calculate total interaction time
df_grouped_prompt = df.groupby(['user', 'prompt']).agg({
    'creation_date': lambda x: (x.max() - x.min()).total_seconds()
}).reset_index()

df_grouped_prompt.rename(columns={'creation_date': 'time_per_prompt_(seconds)'}, inplace=True)

df_grouped_prompt['time_per_prompt'] = pd.to_timedelta(df_grouped_prompt['time_per_prompt_(seconds)'], unit='s')

print(df_grouped_prompt)

    user             prompt  time_per_prompt_(seconds) time_per_prompt
0  user1  a profile picture                        0.0 0 days 00:00:00
1  user2  a profile picture                        1.0 0 days 00:00:01


In [41]:
df.head()

Unnamed: 0,user,path,creation_date,file_size,mode,info,palette,dimensions
10,user1,images\user1\images\a_profile_picture\0.jpg,2024-06-13 12:06:40,623475,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
11,user1,images\user1\images\a_profile_picture\1.jpg,2024-06-13 12:06:40,658977,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
12,user1,images\user1\images\a_profile_picture\2.jpg,2024-06-13 12:06:40,652117,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
13,user1,images\user1\images\a_profile_picture\3.jpg,2024-06-13 12:06:40,879054,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
14,user1,images\user1\images\a_profile_picture\variatio...,2024-06-13 12:06:40,472271,RGB,"{'jfif': 257, 'jfif_version': (1, 1), 'jfif_un...",,"(1024, 1024)"
