Processing Data

In [11]:
import pandas as pd
import numpy as np
import sqlite3

connection = sqlite3.connect("focus.db")
df = pd.read_sql("SELECT * FROM focus_logs",connection)
#The first thing is to add a tracking ended entry. So...
df

Unnamed: 0,id,date,time,focused,program
0,1,09/06/2024,17:45:22,focus.py - Time-Manager - Visual Studio Code,Code.exe
1,2,09/06/2024,17:57:03,Efficient Time Calculation - Google Chrome,chrome.exe
2,3,09/06/2024,17:57:05,00:22 - Time to focus! - Google Chrome,chrome.exe
3,4,09/06/2024,17:57:18,Pomodoro Timer Online - Pomofocus - Google Chrome,chrome.exe
4,5,09/06/2024,17:57:19,25:00 - Time to focus! - Google Chrome,chrome.exe
5,6,09/06/2024,17:57:21,focus.py - Time-Manager - Visual Studio Code,Code.exe
6,7,09/06/2024,18:00:43,,explorer.exe
7,8,09/06/2024,18:00:44,"Soft Jazz Music for Study, Work, Focus☕Relaxin...",chrome.exe
8,9,09/06/2024,18:00:46,25:00 - Time to focus! - Google Chrome,chrome.exe
9,10,09/06/2024,18:00:48,focus.py - Time-Manager - Visual Studio Code,Code.exe


In [12]:
programInsights = pd.read_sql("SELECT * FROM program_insights",connection)
#The first thing is to add a tracking ended entry. So...
programInsights


Unnamed: 0,id,program,total_time,last_updated
0,1,Code.exe,0 days 00:15:08,2024-09-06 22:00:49
1,3,chrome.exe,0 days 00:00:22,2024-09-06 22:00:49
2,8,explorer.exe,0 days 00:00:01,2024-09-06 22:00:49


In [13]:
generalInsights = pd.read_sql("SELECT * FROM general_insights",connection)
#The first thing is to add a tracking ended entry. So...
generalInsights

Unnamed: 0,id,total_context_switches,last_updated


In [8]:
#First DF

# At the beginning of all of this, in the raw csv, add a entry signifying the Tracking Ended. This is so we don't have
# crazy gaps between ending and starting the next session. Add an ignorance for it.
from datetime import datetime
# Now by entry, I want to get a total time/add up datetimes. Join "Date" and "Time" then just sum up?
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
#Makes a column w full datetime object

# Sort by 'DateTime' to ensure correct time difference calculation
df = df.sort_values(by=['datetime'])

# Current datetime - next datetime
df['timespent'] = df['datetime'].shift(-1) - df['datetime']
# For the last row, set a default time (e.g., 5 seconds), as there's no next row
df.iloc[-1, df.columns.get_loc('timespent')] = pd.Timedelta(seconds=5)

# Group by 'Program' and sum the total time spent for each program
total_time_per_program = df.groupby('program')['timespent'].sum()
print(total_time_per_program.sort_values(ascending=False))


program
chrome.exe     0 days 00:00:13
Code.exe       0 days 00:00:08
explorer.exe   0 days 00:00:02
Name: timespent, dtype: timedelta64[ns]


In [10]:
#Task: Storing insights into the database
insights_data = {
    'total_time': total_time_per_program
}
cursor = connection.cursor()

# Insert or update insights for each program
for program, total_time in total_time_per_program.items():

    # Insert or update the row for the program
    cursor.execute('''
        INSERT INTO program_insights (program, total_time)
        VALUES (?, ?)
        ON CONFLICT(program) DO UPDATE SET 
        total_time = excluded.total_time,
        last_updated = CURRENT_TIMESTAMP
    ''', (program, str(total_time)))
