# LinkedIn Learning Courses Analysis

This notebook analyses LinkedIn Courses taken by CaseWare employees. Created as part of hackathon 2020 by Oliver Phan,
Ekat Zaytseva, Irina Balzamova, and Shweta Chandratre.

In [None]:
# tags: configuration
from cwi_notebook.configuration import notebook, inputs, outputs
from cwi_taxonomy.v0.tags import *
notebook.id="linkedin_learning_courses_analysis"
notebook.version="1.0.0"
notebook.autorun=False

outputs.transaction_scores.columns = [AMOUNT, POSTING_DATE, ACCOUNT_ID, ACCOUNT_MAIN_DESCRIPTION]
inputs.transactions.required_columns = []

In [None]:
import pandas as pd
import requests
import ndjson
import dataclasses
from datetime import datetime

AUTH_ENDPOINT = 'http://localhost:3000/auth'
ORGANIZATIONS_ENDPOINT = 'http://localhost:3000/organizations'
ACCOUNTS_ENDPOINT = 'http://localhost:3000/organizations/*/accounts'
TRANSACTIONS_ENDPOINT = 'http://localhost:3000/organizations/1/accounts/*/transactions'

# TODO: Setup Linkedin endpoints to agree with these ids for filtering
LINKEDIN_ORGID = "123"
COURSE_ID = 123
COURSE_VIEW_ID = 123

@dataclasses.dataclass
class Transaction:
    entry_id: str
    line_number: str
    account_id: str
    posting_date: datetime
    amount: float
    entry_number: str
        
    def to_dict(self):
        return {
            'entry_id': self.entry_id,
            'line_number': self.line_number,
            'account_id': self.account_id,
            'posting_date': self.posting_date,
            'amount': self.amount,
            'entry_number': self.entry_number
        }

# TODO: Setup Linkedin binding with agreed upon user/password & returned access token format
def authenticate_against_p19k():
    provider_bindings_token = requests.post(AUTH_ENDPOINT, json={"user": "linkedin", "password": "linkedin"})
    return provider_bindings_token.json()

def get_organizations():
    token = authenticate_against_p19k()
    response = requests.get(format(ORGANIZATIONS_ENDPOINT),
                                 headers={"x-cwi-binding-token": token['accessToken']})
    organizations = response.json(cls=ndjson.Decoder)
    print(organizations)
    # TODO: Load into dataframe
    
def get_accounts():
    token = authenticate_against_p19k()
    response = requests.get(format(ACCOUNTS_ENDPOINT),
                                 headers={"x-cwi-binding-token": token['accessToken']})
    return response.json(cls=ndjson.Decoder)
    # TODO: Load into dataframe

def get_transactions() -> pd.DataFrame:
    token = authenticate_against_p19k()
    response = requests.get(format(TRANSACTIONS_ENDPOINT),
                                 headers={"x-cwi-binding-token": token['accessToken']})
    transactions = response.json(cls=ndjson.Decoder)
    transactions = transactions[1:-1]
    parsed_date = [
        Transaction(transaction['entry_id'], transaction['line_number'], transaction['account_id'], datetime.strptime(transaction['posting_date'], '%Y-%m-%d'), transaction['amount'], transaction['entry_number']).to_dict()
        for transaction in transactions
    ]
    df = pd.DataFrame.from_records(parsed_date)
    return df

print(get_transactions())

In [None]:
from datetime import date, datetime
from cwi_notebook import data_loader
# tags: evaluate
# function to find most popular courses, returns sorted list of courses by popularity
def evaluate(top, date_after, data_source) -> pd.DataFrame:
    if data_source == 'linkedin':
        data = get_transactions()
    else:
        data = data_loader.load_data(inputs)
    df = data[['entry_id', 'line_number', 'account_id', 'amount', 'posting_date']]
    df = df[df['posting_date'] >= datetime.combine(date_after, datetime.min.time())]
    df = df.drop_duplicates('entry_id')
    df = df.sort_values(['amount'], ascending=False)
    return df.head(top)

In [None]:
# tags: visualization
import ipywidgets as widgets
import matplotlib.pyplot as plt
options = [10, 20]
top_selector = widgets.Dropdown(
    options=options,
    description='View top',
    value=options[0],
    layout=widgets.Layout(width='10%',height='40px')
)
date_selector = widgets.DatePicker(
    value=date.today()
)
data_options = ['sample', 'linkedin']
data_selector = widgets.Dropdown(
    options=data_options,
    description='Data source',
    value=data_options[0],
    layout=widgets.Layout(width='20%',height='40px')
)
def execute_score(top_items, date_filter, data_source):
    result = evaluate(top_items, date_filter, data_source)
    plt.figure(figsize=(top_items, 5 * top_items / 10))
    plt.bar(result['account_id'], result['amount'])
    plt.xlabel('Courses', fontsize=14)
    plt.ylabel('Views', fontsize=14)
    plt.title(f'Top {top_items} LinkedIn Courses since {date_filter} incl.', fontsize=14)
    plt.show()

widgets.interact(execute_score, top_items=top_selector, date_filter=date_selector, data_source=data_selector)
