### 1. Sraper Test

In [None]:
from compare_2_courses.scrape.utils import get_default_chrome_driver
from compare_2_courses.scrape.scraper_config import ScraperConfig
from compare_2_courses.scrape.udemy_scraper import UdemyScraper
from compare_2_courses.constants import (
    AWS_DEA_01_2024_HANDS_ON
    , ULTIMATE_AWS_SAA_C03
    , ULTIMATE_AWS_SAP_C02_2024
    , AWS_2024_DOP_C02
    , ULTIMATE_AWS_NEW_2024_SCS_C02
    )
import json

In [None]:
driver = get_default_chrome_driver()

In [None]:
for course in [
    AWS_DEA_01_2024_HANDS_ON
    , ULTIMATE_AWS_SAA_C03
    , ULTIMATE_AWS_SAP_C02_2024
    , AWS_2024_DOP_C02
    , ULTIMATE_AWS_NEW_2024_SCS_C02
]:
    print(course.title)
    scrape_config = ScraperConfig(course_landing_page=course.landing_page)
    scraper = UdemyScraper(scraper_config=scrape_config, driver=driver)
    course = scraper.scrape()
    with open(f"./data/{course.title}.json", "w+") as f:
        json.dump(course.to_json_dict(),f)

### 2. Load, from scraped data

In [1]:
from compare_2_courses.schemas.course import Course
from compare_2_courses.analytics.analyzer import Analyzer, CompareReport
from compare_2_courses.schemas.course_content import CourseVideo
from compare_2_courses.constants import (
    AWS_DEA_01_2024_HANDS_ON
    , ULTIMATE_AWS_SAA_C03
    , ULTIMATE_AWS_SAP_C02_2024
    , AWS_2024_DOP_C02
    , ULTIMATE_AWS_NEW_2024_SCS_C02
    , UDEMY_LEARNING_PLATFORM
    )
import json

In [2]:
all_courses = []

In [3]:
for scraped_data_path in [
    f"./data/{course.title}.json"
    for course in [
        AWS_DEA_01_2024_HANDS_ON
        , ULTIMATE_AWS_SAA_C03
        , ULTIMATE_AWS_SAP_C02_2024
        , AWS_2024_DOP_C02
        , ULTIMATE_AWS_NEW_2024_SCS_C02
    ]
]:
    with open(scraped_data_path, "r") as f:
        all_courses.append(
            Course.read_json_dict(
                json.load(f)
            )
        )

### 3. Analyze

In [4]:
for c in all_courses:
    print(c.title)

AWS Certified Data Engineer Associate 2024 - Hands On!
Ultimate AWS Certified Solutions Architect Associate SAA-C03
Ultimate AWS Certified Solutions Architect Professional 2024
AWS Certified DevOps Engineer Professional 2024 - DOP-C02
Ultimate AWS Certified Security Specialty [NEW 2024] SCS-C02


In [5]:
analyzer = Analyzer(
    left_course=all_courses[2],
    right_course=all_courses[1]
)

In [6]:
report =  analyzer.analyze()

In [7]:
def percent(u,d) -> float:
    return round((u/d)*100)

def show_results(report: CompareReport, analyzer: Analyzer): 
    count_same_materials = len(report.same_materials)
    print("Compare 2 Udemy courses' materials (videos, exams, labs, articles): ")
    print(f"+ {analyzer.left_course.title} --- {len(report.course_left_unique_materials)} unique materials") 
    print(f"+ {analyzer.right_course.title} --- {len(report.course_right_unique_materials)} unique materials") 
    print(f"\nBoth courses have {count_same_materials} same materials, which takes up to:")
    print(f"+ {percent(count_same_materials,len(analyzer.left_course.materials))}% of {analyzer.left_course.title}")
    print(f"+ {percent(count_same_materials,len(analyzer.right_course.materials))}% of {analyzer.right_course.title}")

In [8]:
show_results(report=report, analyzer=analyzer)

Compare 2 Udemy courses' materials (videos, exams, labs, articles): 
+ Ultimate AWS Certified Solutions Architect Professional 2024 --- 185 unique materials
+ Ultimate AWS Certified Solutions Architect Associate SAA-C03 --- 390 unique materials

Both courses have 33 same materials, which takes up to:
+ 15% of Ultimate AWS Certified Solutions Architect Professional 2024
+ 8% of Ultimate AWS Certified Solutions Architect Associate SAA-C03


In [None]:
from difflib import SequenceMatcher
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [None]:
similar(
    "Ultimate AWS Certified Solutions Architect Professional 2024",
    "Ultimate AWS Certified Solutions Architect Associate SAA-C03"
)