In [8]:
from airtable import AirtableInterface
from dotenv import load_dotenv
from os import environ
import pandas as pd
import plotly.express as px

load_dotenv()

airt = AirtableInterface(environ["AIRTABLE_API_KEY"], environ['AIRTABLE_BASE_ID'], environ['AIRTABLE_TABLE_KEY'])

In [9]:
df = airt.get_pd_dataframe()
df['time'] = pd.to_datetime(df['time'])

def extract_website_name(website):
    if '/' in website:
        return website.split('/')[0]
    if "main.google.com" in website:
        return "gmail.com"
    return website

def clean_program_name(program_name):
    cleaned_name = program_name.strip()

    if cleaned_name.startswith('[') and cleaned_name.endswith(']'):
        cleaned_name = cleaned_name[1:-1]
    
    cleaned_name = cleaned_name.replace("'", "")

    return cleaned_name

# Apply the function to the 'program_name' column
df['program_name'] = df['program_name'].astype(str)
df['program_name'] = df['program_name'].apply(clean_program_name)

# Apply the function to the website column, and use program name where website is NaN
df['website_or_program'] = df['website'].apply(lambda x: extract_website_name(x) if pd.notna(x) else x)
df['website_or_program'].fillna(df['program_name'], inplace=True)
df['project'] = df['project'].astype(str)
df['website_or_program'] = df['website_or_program'].astype(str)

Unnamed: 0,time,program_name,website,category,project,description,website_or_program
0,2023-12-09 22:12:06,Google Chrome,mail.google.com,light work,goofing off,Typing the recipient's email address in a draf...,mail.google.com
1,2023-12-09 22:04:46,Google Chrome,google.com,light work,goofing off,Possibly taking a break with a Google search p...,google.com
2,2023-12-09 22:10:41,Google Chrome,google.com,entertainment,ski trip,Looking at different ski resorts options in Re...,google.com
3,2023-12-09 22:05:06,Google Chrome,arxiv.org,deep work,machine learning project,Continuing to investigate a scientific paper o...,arxiv.org
4,2023-12-09 22:12:01,Google Chrome,mail.google.com,light work,goofing off,Composing an email in Gmail,mail.google.com
...,...,...,...,...,...,...,...
113,2023-12-09 22:13:21,Google Chrome,mail.google.com,deep work,machine learning project,Composing an email update regarding progress o...,mail.google.com
114,2023-12-09 22:06:31,Google Chrome,stack overflow,deep work,machine learning project,Scrolling through a Stack Overflow thread for ...,stack overflow
115,2023-12-09 22:06:11,"Google Chrome, Visual Studio Code",stack overflow,deep work,machine learning project,Looking up information or solutions on Stack O...,stack overflow
116,2023-12-09 22:13:06,Google Chrome,mail.google.com,deep work,machine learning project,Writing an email regarding progress on a machi...,mail.google.com


In [10]:
category_counts = df['category'].value_counts()

# Create a bar chart using Plotly for interactive visualization
fig = px.bar(category_counts, title="Time Spent on Different Categories", labels={'value':'Time Spent', 'index':'Category'})
fig.show()

In [11]:
grouped_data = df.groupby(['category', 'project']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='category', y='time_spent', color='project', title='Time Spent on Different Categories by Project', labels={'time_spent':'Time Spent'})
fig.show()

In [12]:
grouped_data = df.groupby(['category', 'project']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='project', y='time_spent', color='category', title='Time Spent on Different Projects by Category', labels={'time_spent':'Time Spent'})
fig.show()

In [13]:
grouped_data = df.groupby(['project', 'website_or_program']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='project', y='time_spent', color='website_or_program', 
             title='Time Spent on Different Projects by Website/Program',
             labels={'time_spent':'Time Spent', 'website_or_program':'Website/Program'})
fig.show()

In [14]:
# Group by 'website_or_program' and count the occurrences
website_program_counts = df['website_or_program'].value_counts()

# Create a plotly figure for a list-style chart
fig = px.bar(website_program_counts, title="Time Spent on Different Websites/Programs", labels={'value':'Time Spent', 'index':'Website/Program'})
fig.show()