# LinkedIn Learning Courses Analysis

This notebook analyses LinkedIn Courses taken by CaseWare employees. Created as part of hackathon 2020 by Oliver Phan,
Ekat Zaytseva, Irina Balzamova, and Shweta Chandratre.

In [None]:
# tags: configuration
from cwi_notebook.configuration import notebook, inputs, outputs
from cwi_taxonomy.v0.tags import *
notebook.id="linkedin_learning_courses_analysis"
notebook.version="1.0.0"
notebook.autorun=False

outputs.transaction_scores.columns = [AMOUNT, POSTING_DATE, ACCOUNT_ID, ACCOUNT_MAIN_DESCRIPTION]

In [13]:
import pandas as pd
import requests
import ndjson

AUTH_ENDPOINT = 'http://localhost:3000/auth'
ORGANIZATIONS_ENDPOINT = 'http://localhost:3000/organizations'
ACCOUNTS_ENDPOINT = 'http://localhost:3000/organizations/{0}/accounts/{1}'
TRANSACTIONS_ENDPOINT = 'http://localhost:3000/organizations/{0}/accounts/{1}/transactions{2}'

# TODO: Setup Linkedin endpoints to agree with these ids for filtering
LINKEDIN_ORGID = "123"
COURSE_ID = 123
COURSE_VIEW_ID = 123


# TODO: Setup Linkedin binding with agreed upon user/password & returned access token format
def authenticate_against_p19k():
    provider_bindings_token = requests.post(AUTH_ENDPOINT, json={"user": "linkedin", "password": "linkedin"})
    print(provider_bindings_token.json())
    return provider_bindings_token.json()

def get_organizations():
    token = authenticate_against_p19k()
    response = requests.get(format(ORGANIZATIONS_ENDPOINT),
                                 headers={"x-cwi-binding-token": token['accessToken']})
    print(token)
    organizations = response.json(cls=ndjson.Decoder)
    print(organizations)
    # TODO: Load into dataframe
    
def get_accounts():
    token = authenticate_against_p19k()
    accounts = requests.get(format(ACCOUNTS_ENDPOINT, LINKEDIN_ORGID, COURSE_ID),
                                 headers={"x-cwi-binding-token": token['accessToken']})
    # TODO: Load into dataframe

def get_transactions():
    token = authenticate_against_p19k()
    accounts = requests.get(format(TRANSACTIONS_ENDPOINT, LINKEDIN_ORGID, COURSE_ID, ),
                                 headers={"x-cwi-binding-token": token['accessToken']})

# function to import data from p19k linkedin binding
def import_p19k_data() -> pd.DataFrame:
    pass

get_organizations()

# TODO: Wrap the json into ndjson -> dataframe
# def sample_p19k_request():
#     r = requests.get('http://localhost:3000/health')
#     print(r.status_code)
#     print(r.content)

{'accessToken': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfIjoiNzg3ODc4Nzg3ODc4Nzg3ODc4Nzg3ODc4Nzg3ODc4NzgiLCJjb2RlIjoiYjc0ZjBkZmYwYjcxYWZlZWY5NDZhOTMyOWI5MGUwN2NiNjU4NjQ0YjYyNDQ3OTQ4OGJiYzExNWUzNDhkOTFkZjMzMzM1ZThhMjczOWNhOGJkYmZjNmQ4OGJiODk3MzFmIiwiaWF0IjoxNjA0MDA3Nzg5fQ.Jzx5L7ZK_UkJeCNwoCw_b5LSRYvmir2mwZzy3gA8HkA', 'refreshToken': None, 'expiresIn': 0, 'expiry': 'no expiration'}
{'accessToken': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfIjoiNzg3ODc4Nzg3ODc4Nzg3ODc4Nzg3ODc4Nzg3ODc4NzgiLCJjb2RlIjoiYjc0ZjBkZmYwYjcxYWZlZWY5NDZhOTMyOWI5MGUwN2NiNjU4NjQ0YjYyNDQ3OTQ4OGJiYzExNWUzNDhkOTFkZjMzMzM1ZThhMjczOWNhOGJkYmZjNmQ4OGJiODk3MzFmIiwiaWF0IjoxNjA0MDA3Nzg5fQ.Jzx5L7ZK_UkJeCNwoCw_b5LSRYvmir2mwZzy3gA8HkA', 'refreshToken': None, 'expiresIn': 0, 'expiry': 'no expiration'}
[{'state': 'START', 'dataType': 'Organization'}, {'state': 'END', 'dataType': 'Organization'}]


In [None]:
from datetime import date, datetime
from cwi_notebook import data_loader
# tags: evaluate
# function to find most popular courses, returns sorted list of courses by popularity
def evaluate(top, date_after) -> pd.DataFrame:
    data = data_loader.load_data(inputs)
    df = data[['entry_id', 'line_number', 'account_id', 'account_main_description', 'amount', 'posting_date']]
    df = df[df['posting_date'] > datetime.combine(date_after, datetime.min.time())]
    df = df.drop_duplicates('entry_id')
    df = df.sort_values(['amount'], ascending=False)
    return df.head(top)

In [None]:
# tags: visualization
import ipywidgets as widgets
import matplotlib.pyplot as plt
options = [10, 20]
top_selector = widgets.Dropdown(
    options=options,
    description='View top',
    value=options[0],
    layout=widgets.Layout(width='10%',height='40px')
)
date_selector = widgets.DatePicker(
    value=date.today()
)
def execute_score(top_items, date_filter):
    result = evaluate(top_items, date_filter)
    plt.figure(figsize=(top_items, 5 * top_items / 10))
    plt.bar(result['account_id'], result['amount'])
    plt.xlabel('Courses', fontsize=14)
    plt.ylabel('Views', fontsize=14)
    plt.title(f'Top {top_items} LinkedIn Courses since {date_filter}', fontsize=14)
    plt.show()

widgets.interact(execute_score, top_items=top_selector, date_filter=date_selector)
