## Coding...

In [333]:
# importing libraries


# for data manipulation

import pandas as pd 
import numpy as np 
import datetime as dt


# for file reading 

import glob 
import os 


# for visualizations 

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [297]:
# function to read all the files and creating a joined data

def read_files(directory):
    files = glob.glob(os.path.join(directory, '*.csv'))
    new_df = pd.DataFrame()
    for file in files:
        df = pd.read_csv(file)
        file_name = os.path.splitext(os.path.basename(file))[0].split('_')[0]
        df["Type"] = file_name
        new_df = pd.concat([new_df, df])
        new_df = new_df.sort_values(by = ["Start"])
    new_df = new_df.reset_index()
    new_df = new_df.iloc[:, 1:]
    return new_df

In [298]:
# importing files from directory in a dataframe 

df = read_files("Files")

# 1. Working on the January Study Data 

In [304]:
# save only the study type and drop type
study_df = df[df["Type"] == "Study"].drop(["Type"], axis = 1)

# remove end column
study_df = study_df.drop(columns = ["End"])

# converting datetime columns to appropriate dtypes

study_df["Start"] = pd.to_datetime(study_df["Start"], format = "%d.%m.%Y %H:%M", errors = "coerce")
#study_df["End"] = pd.to_datetime(study_df["End"], format = "%d.%m.%Y %H:%M")

study_df = study_df.drop(["Title"], axis = 1)
study_df["date_var"] = study_df["Start"].dt.strftime("%m.%d.%Y")
study_df["date_var"] = pd.to_datetime(study_df["date_var"])
study_df.drop(["Start"], axis = 1)

# iterator for the dataframe 
i = 0
# column for total hours in a day 
date_details = {}
# list of items in Hours
listB = list(study_df["Hours"])
for date in study_df["date_var"]:
    if date not in date_details.keys():
        date_details[date] = []
    date_details[date].append(listB[i])
    i = i+1

# create a new dataframe with keys as dates, counts and total 
date = []
count = []
total = []

for key in date_details.keys():
    date.append(key)
    count.append(len(date_details[key]))
    total.append(sum(date_details[key]))
data = {"Date": date, 
        "# of study sessions": count, 
        "Total hours completed": total}
jan_study_data = pd.DataFrame(data)
jan_study_data = jan_study_data.sort_values("Date")
jan_study_data["Cum Sum"] = jan_study_data["Total hours completed"].cumsum().round(5)

# convert datetime to only date and month 
def day_month(df2, columns):
    for columns in df2.columns:
        try:
            column in df2
        except KeyError:
            print(str(column) + " not in " + str(df2))
        '''if df2.column.dtype != "datetime64[ns]":
            break'''
        if column in df2.columns:
            date_var = str(column) + " Date"
            month_var = str(column) + " Month"
            df2[date_var] = df2[column].dt.date()
            df2[month_var] = df2[column].dt.month()
    return df2

## 1.0 Visualizations

### 1.0.1 Cumulative study hours by day 

In [356]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=jan_study_data["Date"],
        y=jan_study_data["Total hours completed"],
        name="Total studied"
    ))
fig.add_trace(
    go.Bar(
        x=jan_study_data["Date"],
        y=jan_study_data["# of study sessions"],
        name="# of study sessions"
    ))
fig.add_trace(
    go.Scatter(
        x=jan_study_data["Date"],
        y=jan_study_data["Cum Sum"],
        name="Cumulative hours"
    ))
fig.show()