In [1]:
from airtable import AirtableInterface
from dotenv import load_dotenv
from os import environ
import pandas as pd
import plotly.express as px

load_dotenv()

airt = AirtableInterface(environ["AIRTABLE_API_KEY"], environ['AIRTABLE_BASE_ID'], environ['AIRTABLE_TABLE_KEY'])

In [2]:
df = airt.get_pd_dataframe()
df['time'] = pd.to_datetime(df['time'])
df['end_time'] = df['time'].shift(-1)

def extract_website_name(website):
    if '/' in website:
        return website.split('/')[0]
    if "mail.google.com" in website:
        return "gmail.com"
    return website

def clean_program_name(program_name):
    cleaned_name = program_name.strip()

    if cleaned_name.startswith('[') and cleaned_name.endswith(']'):
        cleaned_name = cleaned_name[1:-1]
    
    cleaned_name = cleaned_name.replace("'", "")

    return cleaned_name

# Apply the function to the 'program_name' column
df['program_name'] = df['program_name'].astype(str)
df['program_name'] = df['program_name'].apply(clean_program_name)

# Apply the function to the website column, and use program name where website is NaN
df['website_or_program'] = df['website'].apply(lambda x: extract_website_name(x) if pd.notna(x) else x)
df['website_or_program'].fillna(df['program_name'], inplace=True)
df['project'] = df['project'].astype(str)
df['website_or_program'] = df['website_or_program'].astype(str)

In [3]:
category_counts = df['category'].value_counts()

# Create a bar chart using Plotly for interactive visualization
fig = px.bar(category_counts, title="Time Spent on Different Categories", labels={'value':'Time Spent', 'index':'Category'})
fig.show()

In [4]:
grouped_data = df.groupby(['category', 'project']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='category', y='time_spent', color='project', title='Time Spent on Different Categories by Project', labels={'time_spent':'Time Spent'})
fig.show()

In [5]:
grouped_data = df.groupby(['category', 'project']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='project', y='time_spent', color='category', title='Time Spent on Different Projects by Category', labels={'time_spent':'Time Spent'})
fig.show()

In [6]:
grouped_data = df.groupby(['project', 'website_or_program']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='project', y='time_spent', color='website_or_program', 
             title='Time Spent on Different Projects by Website/Program',
             labels={'time_spent':'Time Spent', 'website_or_program':'Website/Program'})
fig.show()

In [7]:
grouped_data = df.groupby(['category', 'website_or_program']).size().reset_index(name='time_spent')

# Create a stacked bar chart using Plotly for interactive visualization
fig = px.bar(grouped_data, x='category', y='time_spent', color='website_or_program', 
             title='Time Spent on Different Categories by Website/Program',
             labels={'time_spent':'Time Spent', 'website_or_program':'Website/Program'})
fig.show()

In [8]:
# Group by 'website_or_program' and count the occurrences
website_program_counts = df['website_or_program'].value_counts()

# Create a plotly figure for a list-style chart
fig = px.bar(website_program_counts, title="Time Spent on Different Websites/Programs", labels={'value':'Time Spent', 'index':'Website/Program'})
fig.show()

In [9]:
website_program_counts = df['website_or_program'].value_counts()

# Create a plotly figure for a horizontal (list-style) bar chart
fig = px.bar(website_program_counts, orientation='h', 
             title="Time Spent on Different Websites/Programs",
             labels={'value':'Time Spent', 'index':'Website/Program'},
             height=900)  # Adjust height based on the number of items
fig.update_layout(yaxis={'categoryorder':'total ascending'})  # Sort items
fig.show()

In [10]:
df['interval'] = df['time'].dt.floor('30S')  # Grouping time into 30-second intervals

# Group by interval and website_or_program, then count the occurrences
grouped_data = df.groupby(['interval', 'website_or_program']).size().reset_index(name='frequency')

# Convert the frequency to a time duration (30 seconds for each count)
grouped_data['time_spent_seconds'] = grouped_data['frequency'] * 30

# Create a stacked bar chart
fig = px.bar(grouped_data, x='interval', y='time_spent_seconds', color='website_or_program', 
             title='Time Spent on Each Website/Program in 30-Second Intervals',
             labels={'time_spent_seconds':'Time Spent (seconds)', 'interval':'Interval', 'website_or_program':'Website/Program'})
fig.show()

In [11]:
# grouped_data = df.groupby(['category', 'website_or_program']).size().reset_index(name='time_spent')

# # Create a stacked bar chart using Plotly for interactive visualization
# fig = px.bar(grouped_data, x='category', y='time_spent', color='website_or_program', 
#              title='Time Spent on Different Categories by Website/Program',
#              labels={'time_spent':'Time Spent', 'website_or_program':'Website/Program'})
# from json import dumps
# from plotly.utils import PlotlyJSONEncoder
# from plotly import jsonify
# graphJSON = dumps(fig, cls=PlotlyJSONEncoder)
# print(jsonify(graphJSON))

ImportError: cannot import name 'jsonify' from 'plotly' (/Users/jfeibs/.pyenv/versions/3.11.6/envs/aiml/lib/python3.11/site-packages/plotly/__init__.py)

In [13]:
df['end_time'] = df['time'].shift(-1)  # Assuming each record's end time is the next record's start time
fig = px.timeline(df, x_start="time", x_end="end_time", y="website_or_program", color="category")
fig.update_layout(title="Continuous Screen Usage Timeline", xaxis_title="Time", yaxis_title="Website/Program")
fig.show()

In [14]:
usage_counts = df.groupby(['website_or_program', 'category']).size().reset_index(name='counts')
fig = px.scatter(usage_counts, x="website_or_program", y="category", size="counts", color="category",
                 title="Scatter Plot of Program/Website Usage by Category")
fig.show()

In [15]:
project_time = df.groupby('project').size().reset_index(name='time_spent')
fig = px.scatter(project_time, x="project", y="time_spent", size="time_spent", color="project",
                 title="Bubble Chart of Time Spent on Different Projects")
fig.show()

In [19]:
project_time = df.groupby('website_or_program').size().reset_index(name='time_spent')
fig = px.scatter(project_time, x="website_or_program", y="time_spent", size="time_spent", color="website_or_program",
                 title="Bubble Chart of Time Spent on Different Websites or Programs")
fig.show()

In [16]:
df['hour'] = df['time'].dt.hour
heatmap_data = df.groupby(['hour', 'website_or_program']).size().unstack(fill_value=0)
fig = px.imshow(heatmap_data, aspect="auto", title="Heatmap of Program Usage Over Hours of the Day")
fig.show()

In [21]:
df['end_time'] = df['time'].shift(-1)
# x = df[:-1]  # Removing the last row as it won't have a valid end time

# Plotting
fig = px.timeline(df, x_start='time', x_end='end_time', y=[1]*len(df), color='website_or_program',
                  labels={'y': '', 'color': 'Website/Program'})
fig.update_layout(title='Screen Usage Timeline', xaxis_title='Time', yaxis_title='',
                  showlegend=True, yaxis_showticklabels=False)
fig.show()

In [None]:
# Give me new types of charts showing program or project usage over time as a continuous line. Also give me additional charts, perhaps around 5 new types. Maybe also include a sunburst chart, a dot plot, a timeline graph, a time series 

In [22]:
# Assuming each record represents a fixed interval of time (e.g., 30 seconds)
# df['end_time'] = df['time'].shift(-1)

# Creating the timeline chart
fig = px.timeline(df, x_start='time', x_end='end_time', y='project', color='website_or_program',
                  labels={'color': 'Website/Program'})
fig.update_layout(title='Screen Usage Timeline by Project', xaxis_title='Time', yaxis_title='Project',
                  showlegend=True)
fig.show()

In [23]:
# Creating the horizontal bar chart
fig = px.bar(df, x='time', y='website_or_program', orientation='h',
             labels={'time': 'Time', 'website_or_program': 'Website/Program'},
             title='Screen Usage by Website/Program')
fig.show()

In [26]:
# df['diff'] = df.groupby('website_or_program')['time'].diff().ne(pd.Timedelta(minutes=1)).cumsum()
grouped = df.groupby(['website_or_program', 'time']).reset_index()

# Plotting a horizontal bar chart
fig = px.bar(grouped, x='time', y='website_or_program', orientation='h', title='Continuous Usage by Website/Program',
             labels={'time': 'Time', 'website_or_program': 'Website/Program'})
fig.show()

AttributeError: 'DataFrameGroupBy' object has no attribute 'reset_index'

In [32]:
fig = px.timeline(df, x_start="time", x_end="end_time", y=[1]*len(df), color="website_or_program")
fig.update_layout(title="Continuous Screen Usage Timeline", xaxis_title="Time")
fig.show()