In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import env
import warnings
warnings.filterwarnings("ignore")
import os
import re

In [None]:
url = f'mysql+pymysql://{env.user}:{env.password}@{env.host}/curriculum_logs'
query = '''
select logs.user_id as user, 
    cohorts.name as cohort_name, 
    logs.date, logs.time, 
    logs.path as endpoint, 
    logs.ip as source_ip, 
    cohorts.start_date,
    cohorts.end_date,
    cohorts.program_id
from 
    logs 
    join cohorts
    on logs.cohort_id = cohorts.id;
'''
df = pd.read_sql(query, url)
df.to_csv('logs.csv')
df.head()

In [None]:
# set date column as datetime dtype
df.date = pd.to_datetime(df.date)
df.start_date = pd.to_datetime(df.start_date)
df.end_date = pd.to_datetime(df.end_date)
df = df.set_index(df.date)

In [None]:
#make a pages column
df['pages_one_user'] = df['endpoint'].resample('d').count()
df.head(10)

In [None]:
# split path into 4 pages
df= pd.concat([df, df.endpoint.str.split('/',3, expand = True)], axis=1)
df.head(10)

In [None]:
# rename pages
df.rename(columns={0:'page_1',1:'page_2',2:'page_3',3:'page_4'}, inplace = True)
df.head()

In [None]:
#check if any staff and in web dev
wd_df= df[(df['program_id'] == 1) & (df['cohort_name'] == 'Staff')]
wd_df.head()

In [None]:
#checking if any one is staff and in data science
ds_df= df[(df['program_id'] == 3) & (df['cohort_name'] == 'Staff')]
ds_df.head()

In [None]:
# web dev and DS with activity after end date into own df wit
web_dev = df[(df['program_id'] != 3) & (df['cohort_name'] != 'Staff') & (df.index > df.end_date)]
DS = df[(df['program_id'] == 3) & (df.index > df.end_date)]
DS.head()

In [None]:
web_dev.head()

### What topics are grads continuing to reference after graduation and into their jobs (for each program)?

In [None]:
# for DS cohort
plt.figure(figsize=(20,10))
DS[['endpoint']].value_counts().head().plot(kind='barh')

In [None]:
plt.figure(figsize=(20,10))
web_dev[['endpoint']].value_counts().head().plot(kind='barh')

In [None]:
DS.