Processing Data

In [13]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

connection = sqlite3.connect("focus.db")
df = pd.read_sql("SELECT * FROM focus_logs",connection)
#The first thing is to add a tracking ended entry. So...
df

Unnamed: 0,id,date,time,focused,program,session_end
0,1,09/09/2024,20:59:11,focus.py - Time-Manager - Visual Studio Code,Code.exe,0
1,2,09/09/2024,20:59:13,ChatGPT - Google Chrome,chrome.exe,0
2,3,09/09/2024,20:59:14,focus.py - Time-Manager - Visual Studio Code,Code.exe,0
3,4,09/09/2024,20:59:15,Chris Chan's Horrifying History (Full Document...,chrome.exe,0
4,5,09/09/2024,20:59:25,focus.py - Time-Manager - Visual Studio Code,Code.exe,0
5,6,09/09/2024,20:59:28,Session End,,1


In [14]:
programInsights = pd.read_sql("SELECT * FROM program_insights",connection)
#The first thing is to add a tracking ended entry. So...
programInsights


Unnamed: 0,id,program,total_time,average_time,last_updated
0,1,Code.exe,0 days 00:00:06,0 days 00:00:02,2024-09-10 00:59:28
1,2,,0 days 00:00:05,0 days 00:00:05,2024-09-10 00:59:28
2,3,chrome.exe,0 days 00:00:11,0 days 00:00:05.500000,2024-09-10 00:59:28


In [9]:
generalInsights = pd.read_sql("SELECT * FROM general_insights",connection)
#The first thing is to add a tracking ended entry. So...
generalInsights

Unnamed: 0,id,total_context_switches,last_updated


In [10]:
df = pd.read_sql("SELECT * FROM focus_logs",connection)
#The first thing is to add a tracking ended entry. So...
# Now by entry, I want to get a total time/add up datetimes. Join "Date" and "Time" then just sum up?
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
df = df.sort_values(by=['datetime'])

# Current datetime - next datetime
df['timespent'] = df['datetime'].shift(-1) - df['datetime']
# For the last row, set a default time (e.g., 5 seconds), as there's no next row
df.iloc[-1, df.columns.get_loc('timespent')] = pd.Timedelta(seconds=5)

total_time_per_program = df.groupby('program')['timespent'].sum()
df

Unnamed: 0,id,date,time,focused,program,session_end,datetime,timespent
0,1,09/09/2024,20:59:11,focus.py - Time-Manager - Visual Studio Code,Code.exe,0,2024-09-09 20:59:11,0 days 00:00:02
1,2,09/09/2024,20:59:13,ChatGPT - Google Chrome,chrome.exe,0,2024-09-09 20:59:13,0 days 00:00:01
2,3,09/09/2024,20:59:14,focus.py - Time-Manager - Visual Studio Code,Code.exe,0,2024-09-09 20:59:14,0 days 00:00:01
3,4,09/09/2024,20:59:15,Chris Chan's Horrifying History (Full Document...,chrome.exe,0,2024-09-09 20:59:15,0 days 00:00:10
4,5,09/09/2024,20:59:25,focus.py - Time-Manager - Visual Studio Code,Code.exe,0,2024-09-09 20:59:25,0 days 00:00:03
5,6,09/09/2024,20:59:28,Session End,,1,2024-09-09 20:59:28,0 days 00:00:05


In [12]:
#Overarching Insight section

#1. Context Switching
#How to get it? Get the time that we usually calculate for the time in focus_logs. Then, sum it up until we go to a different program, 
# then keep repeating and average it out. "Average Time Spent on a Task"
average_time_per_program = df.groupby('program')['timespent'].mean()
print(average_time_per_program)

program
Code.exe            0 days 00:00:02
None                0 days 00:00:05
chrome.exe   0 days 00:00:05.500000
Name: timespent, dtype: timedelta64[ns]


In [8]:
# # #First DF

# # # At the beginning of all of this, in the raw csv, add a entry signifying the Tracking Ended. This is so we don't have
# # # crazy gaps between ending and starting the next session. Add an ignorance for it.
# # from datetime import datetime
# # # Now by entry, I want to get a total time/add up datetimes. Join "Date" and "Time" then just sum up?
# df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
# #Makes a column w full datetime object

# # Sort by 'DateTime' to ensure correct time difference calculation
# df = df.sort_values(by=['datetime'])

# # Current datetime - next datetime
# df['timespent'] = df['datetime'].shift(-1) - df['datetime']
# # For the last row, set a default time (e.g., 5 seconds), as there's no next row
# df.iloc[-1, df.columns.get_loc('timespent')] = pd.Timedelta(seconds=5)

# # Group by 'Program' and sum the total time spent for each program
# total_time_per_program = df.groupby('program')['timespent'].sum()
# print(total_time_per_program.sort_values(ascending=False))


program
chrome.exe     0 days 00:00:13
Code.exe       0 days 00:00:08
explorer.exe   0 days 00:00:02
Name: timespent, dtype: timedelta64[ns]


In [10]:
# #Task: Storing insights into the database
# insights_data = {
#     'total_time': total_time_per_program
# }
# cursor = connection.cursor()

# # Insert or update insights for each program
# for program, total_time in total_time_per_program.items():

#     # Insert or update the row for the program
#     cursor.execute('''
#         INSERT INTO program_insights (program, total_time)
#         VALUES (?, ?)
#         ON CONFLICT(program) DO UPDATE SET 
#         total_time = excluded.total_time,
#         last_updated = CURRENT_TIMESTAMP
#     ''', (program, str(total_time)))
