In [None]:
import requests
import json
import pandas as pd


In [1]:
from datetime import datetime

print(f"Last time runs on {datetime.now():%d %h, %Y}.")

Last time runs on 08 Mar, 2023.


# Request course data with URL.

In [None]:
# Select course from dict
courses_dict: dict = json.load(open('data/courses/all_courses_dict.json'))
courses_dict.keys()

In [None]:
def get_url(course_name: str):
    course_id = courses_dict[course_name]
    return f'https://api.pwskills.com/v1/course/{course_id}?withAllCourseMetas=true&ignoreInActive=true'

In [None]:
# Enter course name from courses_dict keys
# url = get_url('Full Stack web development')
url = get_url('Data Science masters')
# url = get_url('Java with DSA and system design')

r = requests.get(url)
r

## Save/Export course data in json format.

In [None]:
if r.status_code == 200:
    # json.dump(r.json(), open('../data/courses/_course_data.json', 'w'), indent=2)
    ...
else:
    raise requests.HTTPError('Response status code is not 200.')

In [None]:
# Get data as python dictionary
data = r.json()['data']
data.keys()

# Course Overview

In [None]:
align = 22

# Title of the Course
print('Name of Course:'.ljust(align), data['title'])

# Price of the Course
course_price = round(data['pricing']['IN'] - (data['pricing']['IN'] * data['pricing']['discount']/100))
print('Price of Course:'.ljust(align), f'₹{course_price}')

# Instructors Name
inst_names = [i['name'] for i in data['instructorsDetails']]
print('Name of instructors:'.ljust(align), inst_names)

# Course Certificate Benchmark
cert_bench = data['courseMetas'][0]['certificateBenchmark']
print('Certificate Benchmark:'.ljust(align), f'{cert_bench}%')

# Language of the Course
lang = data['courseMetas'][0]['overview']['language']
print('Language of Course:'.ljust(align), lang)

# Course duration
duration = data['courseMetas'][0]['duration']
print('Course duration:'.ljust(align), duration)

In [None]:
# Get course meta data
meta: dict = data['courseMetas'][0]
meta.keys()

## What you can learn from this course?

In [None]:
learn = meta['overview']['learn']

# print(f"You can learn {len(learn)}+ different types of topics in this course \
# such as {', '.join(learn[:-1]).title()}, etc.")

print(f'You can learn {len(learn)}+ different types of topics in this course.')

for i in learn:
    print(f'  - {i}')

## Projects in this course?

In [None]:
projects = meta['projects']

if not projects:
    raise ValueError('No Projects are available in this course. Go to curriculum section.')

If no projects are available in this project.

### Go to [Curriculum Section](#curriculum-section)

In [None]:
paren_proj = (pd.DataFrame([i for i in projects if len(i) == 2])
                .rename(columns={'_id': 'parentId', 'title': 'parentTitle'}))

child_proj = (pd.DataFrame([i for i in projects if len(i) != 2])
                .rename(columns={'_id': 'childId', 'parent': 'parentId', 'title': 'childTitle'}))


In [None]:
project_df = paren_proj.merge(child_proj, 'inner', 'parentId')
project_df.shape

### Create a date column.

In [None]:
project_df['date'] = project_df['childTitle'].str.extract(r"(\d{1,2} \w{3,4}'23)")

In [None]:
# Fill the null dates values
null_date = project_df[project_df['date'].isnull() == 1]

project_df.loc[null_date.index, 'date'] = null_date['parentTitle'].str.extract(r"(\d{1,2} \w{3,4}'23)")[0]

In [None]:
# Convert date column data type
project_df['date'] = project_df['date'].astype('datetime64')

### Filter titles in the dataset.

In [None]:
# Filter parenTitle
project_df['parentTitle'] = (project_df['parentTitle']
 .str.replace(r"(\d{1,2} \w{3,4}'23)", '', regex=True)
 .str.replace('6 - ', '', regex=False)
 .str.replace(r'Python Project :\s?-  ', '', regex=True)
 .str.strip())

In [None]:
# Filter childTitle
project_df['childTitle'] = (project_df['childTitle'].str.replace(r"(\d{1,2} \w{3,4}'23)", '', regex=True)
 .str.strip())

### Project details.

In [None]:
print(f"This course has {project_df['parentId'].nunique()} different types of (parent) topics for project which are:")

for i in project_df['parentTitle'].unique():
    print(f'  - {i}')

In [None]:
print(f"Also, there are {project_df['childId'].nunique()}+ different (child) topics for project which are:")

for i in project_df['parentTitle'].unique():
    print(f'  + {i}')
    for _, ii, j in project_df[['parentTitle', 'childTitle']].itertuples():
        if ii == i:
            print(f"    - {j}")
    print()

# Curriculum Section

In [None]:
curr_dict: list[dict] = data['courseMetas'][0]['curriculum']
df = pd.DataFrame(curr_dict)

df = df.merge(df[['parent', 'title']],
                how='inner',
                left_on='_id',
                right_on='parent',
                suffixes=('_parent', '_child'))


In [None]:
# Drop columns
df.drop(columns=['_id', 'preview', 'parent_parent', 'parent_child'],
        inplace=True)

# Rename columns
df.rename(columns={
    'title_parent': 'parentTitle',
    'title_child': 'childTitle'
}, inplace=True)


In [None]:
# Create date column
# Old code
# df['date'] = (df['parentTitle']
#               .str.extract(r"(\d{1,2} \w{3,5}'23)")
#               .astype('datetime64'))


In [None]:
# Updated code
df['date'] = pd.to_datetime((df['parentTitle']
                             .str.rsplit('23', n=1).str.get(0).add('23')
                             .str.replace(r"' 23|'23| ' 23", ' 2023', regex=True)
                             .str.replace(r'^(23)$', '', regex=True)
                             .str.replace(r'^\d{1,2} - ', '', regex=True)
                             ), errors='coerce').ffill()


In [None]:
# Remove date sub-string from parentTitle
df['parentTitle'] = df['parentTitle'].str.extract(r".23 ? (\w.*)").ffill()

In [None]:
def print_curr_details(curr_df: pd.DataFrame, pat: str):
    for i in curr_df['parentTitle'].unique():
        if pat.lower() in i.lower():
            # Get date
            date = df.query('parentTitle==@i')['date'].mean()

            print(f'+ {i} - {date:%d %B, %Y}')

            for _, p, c in curr_df[['parentTitle', 'childTitle']].itertuples():
                if i == p:
                    print(f'  - {c}')
            print()

## Find/Get curriculum details by topics.

In [None]:
# Print some title for help
print(df['parentTitle'].sample(10).unique())


curr_inp = input('\nEnter the (parent) topic to see (child) topics: ')

print(f'\n>>> You entered: {curr_inp}\n')
print_curr_details(df, curr_inp)

In [None]:
print(df.shape)
df.sample(28)