In [1]:
from io import StringIO

from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser

import requests

In [2]:
url = "https://info.udemy.com/rs/udemy/images/UdemyforBusinessCourseList.pdf"
outfile = "./UdemyForBusinessCourseList.pdf"

In [3]:
r = requests.get(url)


In [4]:
with open(outfile, 'wb') as of:
    of.write(r.content)

In [5]:
output_string = StringIO()

with open(outfile, 'rb') as in_file:
    parser = PDFParser(in_file)
    doc = PDFDocument(parser)
    rsrcmgr = PDFResourceManager()
    device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    for page in PDFPage.create_pages(doc):
        interpreter.process_page(page)

text = output_string.getvalue()

In [6]:
course_list = []
for line in text.split('\n'):
    if line.startswith("•"):
        sanitized_line = line.replace("•  ", "")
        course_list.append(sanitized_line)
print(f"Found {len(course_list)} courses")

Found 5735 courses


In [7]:
def search_list_for_topic(search_term):
    matches = []
    for course in course_list:
        if search_term.lower() in course.lower():
            matches.append(course)
    print("************************************")
    print(f'Found {len(matches)} courses matching "{search_term}"')
    print("************************************\n")
    for course in sorted(matches):
        print(f"• {course}")

In [8]:
search_list_for_topic("spark")

************************************
Found 28 courses matching "spark"
************************************

• A Big Data Hadoop and Spark project for absolute beginners
• Apache Spark 2.0 with Java -Learn Spark from a Big Data Guru
• Apache Spark 3 - Real-time Stream Processing using Python
• Apache Spark 3 - Real-time Stream Processing using Scala
• Apache Spark 3 - Spark Programming in Python for Beginners
• Apache Spark 3 - Spark Programming in Scala for Beginners
• Apache Spark for Java Developers
• Apache Spark with Scala - Hands On with Big Data!
• Big Data with Apache Spark and AWS
• CCA 131 - Cloudera Certified Hadoop and Spark Administrator
• CCA 175 - Spark and Hadoop Developer - Python (pyspark)
• CCA 175 - Spark and Hadoop Developer Certification - Scala
• CCA 175 Spark and Hadoop Developer - Practice Tests
• Databricks Essentials for Spark Developers (Azure and AWS)
• From 0 to 1 : Spark for Data Science with Python
• HDPCD:Spark using Scala
• Master Apache Spark - Hands 

In [9]:
search_list_for_topic("git ")

************************************
Found 21 courses matching "git "
************************************

• Azure DevOps Repos with GIT and Visual Studio for Developers
• Command Line Essentials: Git Bash for Windows
• Complete Git Guide: Understand and master Git and GitHub
• Get Git Smart Course: Learn Git in Unity, SourceTree, GitHub
• Git & GitHub Complete Masterclass : Beginner to Git Expert
• Git & GitHub Crash Course: Create a Repository From Scratch!
• Git & GitHub Masterclass
• Git & GitHub with Eclipse, Android studio & IntelliJ
• Git Complete: The definitive, step-by-step guide to Git
• Git Essentials: Learn Git with Bitbucket and Sourcetree
• Git Going Fast: One Hour Git Crash Course
• Git Going with Comparing, Branching and Merging
• Git a Web Developer Job: Mastering the Modern Workflow
• Git and GitHub for Writers
• Git by Example
• Git for Geeks: Quick Git Training for Developers
• Git for Windows: Step-By-Step Mastery using Commands and GUI
• GitHub Ultimate: Master 

In [10]:
search_list_for_topic("kafka")

************************************
Found 14 courses matching "kafka"
************************************

• Apache Kafka - Real-time Stream Processing (Master Class)
• Apache Kafka Series - Confluent Schema Registry & REST Proxy
• Apache Kafka Series - KSQL on ksqlDB for Stream Processing !
• Apache Kafka Series - Kafka Cluster Setup & Administration
• Apache Kafka Series - Kafka Connect Hands-on Learning
• Apache Kafka Series - Kafka Monitoring & Operations
• Apache Kafka Series - Kafka Security | SSL SASL Kerberos ACL
• Apache Kafka Series - Kafka Streams for Data Processing
• Apache Kafka Series - Learn Apache Kafka for Beginners v2
• Apache Kafka for Developers using Spring Boot[LatestEdition]
• Apache Kafka for absolute beginners
• CCDAK Confluent Certified Developer for Apache Kafka TESTS
• CCOAK Confluent Certified Operator for Apache Kafka PRACTICE
• Kafka & Kafka Stream With Java Spring Boot - Hands-on Coding
