In [1]:
# from __future__ import division
import itertools
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import math
from sklearn import metrics
from random import randint
from matplotlib import style
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler

In [2]:
def acquire_and_wrangle():
    # Read the csv file
    userlog = pd.read_csv('user_logs.csv')
    # Set datetime object as the index
    userlog = userlog.set_index(userlog.datetime)
    # Drop the original datetime column
    userlog.drop(columns='datetime',inplace=True)
    # Convert all datetime values into datetime datatype
    userlog.end_date = pd.to_datetime(userlog.end_date)
    userlog.start_date = pd.to_datetime(userlog.start_date)
    userlog.created_at = pd.to_datetime(userlog.created_at)
    userlog.updated_at = pd.to_datetime(userlog.updated_at)
    # Drop 'deleted_at' column
    userlog.drop(columns='deleted_at', inplace=True)
    # Drop nan values(there is only one)
    userlog.dropna(inplace=True)
    userlog['program'] = userlog.program_id.map(
        {
            1.0: 'Full-Stack PHP',
            2.0: 'Full-Stack Java',
            3.0: 'Data Science',
            4.0: 'Front-End'
        })
    userlog['ds_student'] = userlog.program_id == 3
    return userlog

In [3]:
def get_weekly_student_activity(df):
    # Save list for Data Science students.
    datascience = list(df[df.ds_student == True].path.unique())
    # Save list for Web Development students.
    webdev = list(df[df.ds_student == False].path.unique())
    # Convert lists into sets.
    set_datascience = set(datascience)
    set_webdev = set(webdev)
    # Create a set that contains the paths that both Web Development students and Data Science students
    # are accessing.
    set1 = set_webdev.intersection(set_datascience)
    # Create a for loop that will compare the amount of visits to a certain path. 
    ds_path = []
    webdev_path = []
    for webpath in set1:
        if (df[df.ds_student == True].path == webpath).sum() > (df[df.ds_student == False].path == webpath).sum():
            ds_path.append(webpath)
        else:
            webdev_path.append(webpath)
    # Convert newly created list into a set.
    ds_path = set(ds_path)
    # Convert newly created list into a set.
    webdev_path = set(webdev_path)
    # Create a new set for Data Science paths only.
    datascience_paths = set_datascience.difference(webdev_path)
    # Create a new set for Web Development paths only.
    webdev_paths = set_webdev.difference(ds_path)
    data_science_hits = df[df.path.isin(datascience_paths)]
    webdev_hits = df[df.path.isin(webdev_paths)]
    # Create a dataframe of all Data Science curriculum hits that are from Web Development students.
    webdev_ds_hits = data_science_hits[data_science_hits.ds_student == False]
    # Create a dataframe of all Web Development curriculum hits that are from Web Development students.
    webdev_webdev_hits = webdev_hits[webdev_hits.ds_student == False]
    # Resample by week and plot the data from above.
    webdev_webdev_hits.resample('7d').path.count().plot(label='Webdev Curriculum')
    webdev_ds_hits.resample('7d').path.count().plot(label='DS Curriculum')
    plt.title('Weekly Web Development Codeup Activity')
    plt.ylabel('Number of Page Visits')
    plt.legend()
    # Create a dataframe of all Data Science curriculum hits that are from Data Science students.
    ds_ds_hits = data_science_hits[data_science_hits.ds_student == True]
    # Create a dataframe of all Web Development curriculum hits that are from Data Science students.
    ds_webdev_hits = webdev_hits[webdev_hits.ds_student == True]
    # Resample by week and plot data for Data Science students.
    (ds_webdev_hits.resample('7d').path.count()).plot(label='Webdev Curriculum')
    (ds_ds_hits.resample('7d').path.count()).plot(label='DS Curriculum')
    plt.title('Weekly Data Science Codeup Activity')
    plt.ylabel('Number of Page Visits')
    plt.legend()

In [None]:
def get_top_lessons_for_graduates(df):
    # Create a dataframe where the curriculum hits happen after the student's graduation.
    post_graduation_ds = df[df.index > df.end_date]
    # Create a dataframe for the top lessons for Data Science graduates.
    top_lessons_ds = pd.DataFrame(post_graduation_ds[post_graduation_ds.ds_student == True].path.value_counts().head(10))
    # Reset the index and rename it to 'lesson'
    top_lessons_ds = top_lessons_ds.reset_index().rename(columns={'index': 'lesson'})
    fig, axs = plt.subplots(ncols=2)
    sns.barplot(x='lesson',y='path', data=top_lessons_ds.head(4), ax=axs[0])
    sns.barplot(x='lesson',y='path', data=top_lessons_webdev.head(4), ax=axs[1])
    
    
    # Create a barplot for the top lessons for Data Science Graduates.
    ax = sns.barplot(x='lesson',y='path', data=top_lessons_ds.head(4))
    ax.set_xticklabels(ax.get_xticklabels(), rotation = 30)
    plt.ylabel('Number of Visits')
    plt.xlabel('Lesson')
    plt.title('Top Four(4) Lessons for Data Science Graduates')
    # Create a dataframe where the curriculum hits happen after the student's graduation.
    post_graduation_webdev = df[df.index > df.end_date]
    # Get the value counts for top-ten paths visited.
    post_graduation_webdev[post_graduation_webdev.ds_student == False].path.value_counts().head(10)
    # Create a dataframe for the top lessons for Webdev graduates.
    top_lessons_webdev = pd.DataFrame(post_graduation_webdev[post_graduation_webdev.ds_student == False].path.value_counts().head(10))
    # Reset the index and rename it to 'lesson'
    top_lessons_webdev = top_lessons_webdev.reset_index().rename(columns={'index': 'lesson'})
    # Create a barplot for the top lessons for Webdev graduates.
    ax = sns.barplot(x='lesson',y='path', data=top_lessons_webdev.head(4))
    ax.set_xticklabels(ax.get_xticklabels(), rotation = 30)
    plt.ylabel('Number of Visits')
    plt.xlabel('Lesson')
    plt.title('Top Four(4) Lessons for Web Development Graduates')
    
    
    