- purchases_onboarded_per_month
- Student and purchase type
- Student and course title
- Student engagement rate and countries

In [18]:
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
course_info_df = pd.read_csv('data/raw/365_course_info.csv')
course_ratings_df = pd.read_csv('data/raw/365_course_ratings.csv')
exam_info_df = pd.read_csv('data/raw/365_exam_info.csv')
quiz_info_df = pd.read_csv('data/raw/365_quiz_info.csv')
student_engagement_df = pd.read_csv('data/raw/365_student_engagement.csv')
student_exams_df = pd.read_csv('data/raw/365_student_exams.csv')
student_hub_questions_df = pd.read_csv('data/raw/365_student_hub_questions.csv')
student_info_df = pd.read_csv('data/raw/365_student_info.csv')
student_learning_df = pd.read_csv('data/raw/365_student_learning.csv')
student_purchases_df = pd.read_csv('data/raw/365_student_purchases.csv')
student_quizzes_df = pd.read_csv('data/raw/365_student_quizzes.csv')

In [3]:
course_ratings_df.date_rated = pd.to_datetime(course_ratings_df.date_rated)
student_engagement_df.date_engaged = pd.to_datetime(student_engagement_df.date_engaged)
student_exams_df.date_exam_completed = pd.to_datetime(student_exams_df.date_exam_completed)
student_hub_questions_df.date_question_asked = pd.to_datetime(student_hub_questions_df.date_question_asked)
student_info_df.date_registered = pd.to_datetime(student_info_df.date_registered)
student_learning_df.date_watched = pd.to_datetime(student_learning_df.date_watched)
student_purchases_df.date_purchased = pd.to_datetime(student_purchases_df.date_purchased)

#### purchases_onboarded_per_month

In [4]:
onboarded_per_month = student_engagement_df.copy()
purchases_per_month = student_purchases_df.copy()

# Onboarded Per Month
onboarded_per_month['Month'] = onboarded_per_month.date_engaged.apply(lambda date : date.month)
onboarded_per_month = onboarded_per_month[['Month']].groupby('Month').size().reset_index()
onboarded_per_month = onboarded_per_month.rename(columns={0: 'Onboarded'})

# Purchases Per Month
purchases_per_month['Month'] = purchases_per_month.date_purchased.apply(lambda date : date.month)
purchases_per_month = purchases_per_month[['Month']].groupby('Month').size().reset_index()
purchases_per_month = purchases_per_month.rename(columns={0: 'Purchases'})

purchases_onboarded_per_month = pd.merge(purchases_per_month, onboarded_per_month, on='Month')
purchases_onboarded_per_month['Percentage'] = (purchases_onboarded_per_month.Purchases / purchases_onboarded_per_month.Onboarded).round(4) * 100

purchases_onboarded_per_month

Unnamed: 0,Month,Purchases,Onboarded,Percentage
0,1,277,3588,7.72
1,2,208,4632,4.49
2,3,331,6118,5.41
3,4,269,5881,4.57
4,5,369,5774,6.39
5,6,298,7007,4.25
6,7,405,7045,5.75
7,8,299,10347,2.89
8,9,391,8028,4.87
9,10,194,6951,2.79


In [22]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=purchases_onboarded_per_month.Month, y=purchases_onboarded_per_month.Purchases, name='Purchases'), secondary_y=False)
fig.add_trace(go.Scatter(x=purchases_onboarded_per_month.Month, y=purchases_onboarded_per_month.Onboarded, name='Onboarded'), secondary_y=True)

fig.update_xaxes(title_text='Month')

fig.update_yaxes(title_text='Amount of Purchases', secondary_y=False)
fig.update_yaxes(title_text='Onboarded', secondary_y=True)

In [5]:
student_purchases_df.purchase_type.value_counts()

purchase_type
Annual       1547
Monthly      1444
Quarterly      50
Name: count, dtype: int64

In [8]:
px.histogram(data_frame=student_purchases_df, x='purchase_type', color='purchase_type')

In [24]:
course_ratings_df

Unnamed: 0,course_id,student_id,course_rating,date_rated
0,14,258956,5,2022-07-06
1,7,259019,5,2022-08-30
2,23,259019,4,2022-08-30
3,14,259283,5,2022-07-08
4,30,259283,5,2022-07-02
...,...,...,...,...
2495,28,295337,5,2022-10-19
2496,7,295363,5,2022-10-19
2497,11,295363,5,2022-10-20
2498,42,295427,5,2022-10-20


In [45]:
df = pd.merge(student_info_df[['student_id', 'student_country']], student_engagement_df[['student_id']], on='student_id', how='left')
df = df.groupby('student_country').size().reset_index().sort_values(by=0, ascending=False).head()
df = df.rename(columns={0: 'engaged'})

df

Unnamed: 0,student_country,engaged
167,US,16379
75,IN,11595
49,EG,4666
56,GB,4377
119,NG,4081


In [48]:
px.bar(data_frame=df, x='student_country', y='engaged', color='student_country')