# Web Scraping

In [19]:
import requests
from bs4 import BeautifulSoup

In [20]:
URL = "https://www.udemy.com/course/python-django-the-practical-guide/"

In [21]:
webpage = requests.get(URL)
bs = BeautifulSoup(webpage.content, "html.parser")

In [None]:
print(bs.prettify())

In [27]:
slug = URL.split("/")[-2]
slug

'python-django-the-practical-guide'

## find
find(tag, attributes, recursive, text, keywords)

In [22]:
title = bs.find("h1", attrs={'class':'clp-lead__title'}).text
title

'Python Django - The Practical Guide'

In [22]:
headline = bs.find("div", attrs={'class':'clp-lead__headline'}).text
headline

'Learn how to build web applications and websites with Python and the Django framework'

In [23]:
instructors = []
creators = bs.find_all("a", attrs={'class':'ud-instructor-links'})
for creator in creators:
    instructors.append(creator.text)
instructors

['Academind by Maximilian Schwarzmüller', 'Maximilian Schwarzmüller']

In [60]:
students_num = bs.find("div", attrs={'class':'enrollment'}).text.split()[0]

In [25]:
rating = bs.find("span", attrs={'data-purpose':'rating-number'}).text
rating

'4.6'

In [15]:
topics = []
topic_tags = bs.find_all("a", attrs={'class':'ud-heading-sm'})
for item in topic_tags:
    topics.append(item.text)
topics

['Development', 'Web Development', 'Django']

In [79]:
class UdemyCourseScraper:

    import requests
    from bs4 import BeautifulSoup

    def __init__(self, url: str, _parsed_page: str = None) -> None:
        self.url = url
        self._parsed_page = _parsed_page

    def get_slug(self) -> str:
        return self.url.split("/")[-2]

    def parse_webpage(self) -> str:
        webpage = requests.get(self.url)
        self._parsed_page = BeautifulSoup(webpage.content, "html.parser")
        return self._parsed_page

    def scrape_title(self) -> str:
        return self._parsed_page.find("h1", attrs={'class':'clp-lead__title'}).text

    def scrape_headline(self) -> str:
        return self._parsed_page.find("div", attrs={'class':'clp-lead__headline'}).text

    def scrape_students_num(self) -> str:
        return self._parsed_page.find("div", attrs={'class':'enrollment'}).text.split()[0].replace(",","")

    def scrape_rating(self) -> str:
        return self._parsed_page.find("span", attrs={'data-purpose':'rating-number'}).text

    def scrape_instructors(self) -> list[str]:
        instructors = []
        elements = self._parsed_page.find_all("a", attrs={'class':'ud-instructor-links'})
        for element in elements:
            instructors.append(element.text)
        return instructors

    def scrape_topics(self) -> list[str]:
        topics = []
        elements = self._parsed_page.find_all("a", attrs={'class':'ud-heading-sm'})
        for element in elements:
            topics.append(element.text)
        return topics


In [82]:
if __name__ == "__main__":
    url = "https://www.udemy.com/course/python-django-the-practical-guide/"
    django = UdemyCourseScraper(url)
    django.parse_webpage()
    parsed_data = {
    "url": url,
    "slug": django.get_slug(),
    "title": django.scrape_title(),
    "headline": django.scrape_headline(),
    "instructors": django.scrape_instructors(),
    "students_num": django.scrape_students_num(),
    "rating": django.scrape_rating(),
    "topics": django.scrape_topics(),
    }
    print(parsed_data)

{'url': 'https://www.udemy.com/course/python-django-the-practical-guide/', 'slug': 'python-django-the-practical-guide', 'title': 'Python Django - The Practical Guide', 'headline': 'Learn how to build web applications and websites with Python and the Django framework', 'instructors': ['Academind by Maximilian Schwarzmüller', 'Maximilian Schwarzmüller'], 'students_num': '38293', 'rating': '4.6', 'topics': ['Development', 'Web Development', 'Django']}


In [1]:
class IngestPostgres:

    import configparser
    import psycopg

    #CONFIG_FILE = "./dev.conf"
    #CONNECTION_CONFIG = "rds-pg13"

    def __init__(self, config_file, connection_profile):
        self.config_file = config_file
        self.connection_profile = connection_profile

    def insert_course_data(self) -> None:

        pg_configuration = configparser.ConfigParser()
        pg_configuration.read(self.config_file)

        with psycopg.connect(
            host=pg_configuration.get(self.connection_profile, "host"),
            port=pg_configuration.get(self.connection_profile, "port"),
            dbname=pg_configuration.get(self.connection_profile, "database"),
            user=pg_configuration.get(self.connection_profile, "username"),
            password=pg_configuration.get(self.connection_profile, "password")
            ) as connection:

            with connection.cursor() as db_cursor:

                db_cursor.execute(
                    "INSERT INTO test (num, data) VALUES (%s, %s)",
                    (100, "abc'def")
                )

                connection.commit()

ModuleNotFoundError: No module named 'psycopg'