In [1]:
import pandas as pd
import json
import pymongo
import matplotlib.dates as mdates
from datetime import datetime

In [2]:
def get_db(write=False):
    if write:
        connection_string = "admin_connection_string"

    else:
        connection_string = "connection_string"

    try:
        with open("local/pass.json") as file:
            file = json.loads(file.read())
            connection_string = file.get(connection_string)
            client = pymongo.MongoClient(
                connection_string)
            db = client["PlusWord"]
            return db
    except Exception as e:
        print(e)

In [3]:
def data_import(collection_name='Times'):
    """Connects to database and creates dataframe containing all columns. Drops unneeded columns and sets timestamp
     datatype. Correct any incorrect time values, sets data times and sorts"""

    # Connects to db and gets collection
    db = get_db()
    collection = db[collection_name]
    df = pd.DataFrame(list(collection.find({})))

    return df

In [4]:
def time_delta_to_num(time_delta):
    """ Takes in time delta and converts it into a number for plotting"""

    # specify a date to use for the times

    zero_date = datetime(2022, 6, 20)

    zero_num = mdates.date2num(zero_date)

    # adds zero_data to timedelta to convert

    time_delta_plus_date = [zero_date + time_unit for time_unit in time_delta]

    # convert datetimes to numbers

    time_delta_as_num = [mins - zero_num for mins in mdates.date2num(time_delta_plus_date)]

    return time_delta_as_num

In [5]:
df=data_import()

df = df[['load_ts', 'time', 'user']]
df['time'] = df['time'].str.replace(r'(^\d\d:\d\d$)', r'00:\1', regex=True)
df['load_ts'] = pd.to_datetime(df['load_ts'], format='%Y-%m-%d %H:%M:%S.%f')
#df['user'] = df['user'].astype('category')
df = df.sort_values(by=['load_ts'])
df = df.rename(columns={'load_ts': 'timestamp'})
df['time_delta'] = pd.to_timedelta(df['time'].astype('timedelta64[ns]'))
df['time_delta_as_num'] = time_delta_to_num(pd.to_timedelta(df['time'].astype('string')))
df['sub_time_delta_as_num'] = time_delta_to_num(pd.to_timedelta(df['timestamp'].dt.time.astype('string')))


df.columns = df.columns.str.capitalize()
df = df.set_index('Timestamp')
df = df.sort_index(ascending=False)