In [1]:
import os
import requests
import json

# Create a directory to store the data
directory = "data_science_course_data"
os.makedirs(directory, exist_ok=True)

base_url = "https://www.udemy.com/api-2.0/discovery-units/all_courses/?page_size=16&subcategory=&instructional_level=&lang=&price=&duration=&closed_captions=&subs_filter_type=&subcategory_id=12&source_page=subcategory_page&locale=en_US&currency=inr&navigation_locale=en_US&skip_price=true&sos=ps&fl=scat"
page = 1
all_courses_data = []  # List to store data from all pages

while True:
    url = f"{base_url}&p={page}"
    
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON content from the response
        course_data = response.json()
        
        # Extracting relevant sections and modifying URLs
        unit = course_data.get('unit', {})
        course_items = unit.get('items', [])
        
        for course in course_items:
            course_title = course.get('title', '')
            course_url = course.get('url', '')
            if course_url.startswith('/'):
                course_url = "https://www.udemy.com" + course_url  # Adding full Udemy URL to relative course URLs
            course_is_paid = course.get('is_paid', False)
            course_description = course.get('headline', '')
            subscribers = course.get('num_subscribers', 0)
            average_rating = course.get('avg_rating', 0)
            num_reviews = course.get('num_reviews', 0)
            num_lectures = course.get('num_published_lectures', 0)
            content_length = course.get('content_info', '')
            last_update = course.get('last_update_date', '')
            badges = [badge.get('badge_text', '') for badge in course.get('badges', [])]

            # Fetching the primary language used in the course from the 'locale' section
            course_language = course.get('locale', {}).get('title', '')

            # Instructional level of the course
            instructional_level = course.get('instructional_level', '')

            # Constructing a dictionary for each course
            course_info = {
                "Course Title": course_title,
                "Course URL": course_url,
                "Is Paid": course_is_paid,
                "Course Description": course_description,
                "Subscribers": subscribers,
                "Average Rating": average_rating,
                "Number of Reviews": num_reviews,
                "Number of Lectures": num_lectures,
                "Content Length": content_length,
                "Last Update": last_update,
                "Badges": badges,
                "Course Language": course_language,
                "Instructional Level": instructional_level
            }

            all_courses_data.append(course_info)  # Append course details to the list

        # Increment the page number for the next iteration
        page += 1
    else:
        print(f"Failed to fetch data for page {page}. Status code:", response.status_code)
        break

# Save all the relevant data to a single JSON file
output_file = os.path.join(directory, "python_courses_data.json")
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(all_courses_data, json_file, ensure_ascii=False, indent=4)

print("Python Course data saved to:", output_file)


Failed to fetch data for page 626. Status code: 400
Python Course data saved to: data_science_course_data\python_courses_data.json


In [2]:
import os
import requests
import json

# Create a directory to store the data
directory = "music_course_data"
os.makedirs(directory, exist_ok=True)

base_url = "https://www.udemy.com/api-2.0/discovery-units/all_courses/?page_size=16&subcategory=&instructional_level=&lang=&price=&duration=&closed_captions=&subs_filter_type=&subcategory_id=302&source_page=subcategory_page&locale=en_US&currency=inr&navigation_locale=en_US&skip_price=true&sos=ps&fl=scat"
page = 1
all_courses_data = []  # List to store data from all pages

while True:
    url = f"{base_url}&p={page}"
    
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON content from the response
        course_data = response.json()
        
        # Extracting relevant sections and modifying URLs
        unit = course_data.get('unit', {})
        course_items = unit.get('items', [])
        
        for course in course_items:
            course_title = course.get('title', '')
            course_url = course.get('url', '')
            if course_url.startswith('/'):
                course_url = "https://www.udemy.com" + course_url  # Adding full Udemy URL to relative course URLs
            course_is_paid = course.get('is_paid', False)
            course_description = course.get('headline', '')
            subscribers = course.get('num_subscribers', 0)
            average_rating = course.get('avg_rating', 0)
            num_reviews = course.get('num_reviews', 0)
            num_lectures = course.get('num_published_lectures', 0)
            content_length = course.get('content_info', '')
            last_update = course.get('last_update_date', '')
            badges = [badge.get('badge_text', '') for badge in course.get('badges', [])]

            # Fetching the primary language used in the course from the 'locale' section
            course_language = course.get('locale', {}).get('title', '')

            # Instructional level of the course
            instructional_level = course.get('instructional_level', '')

            # Extracting author information
            authors = course.get('visible_instructors', [])
            author_names = [author.get('display_name', '') for author in authors]

            # Constructing a dictionary for each course
            course_info = {
                "Course Title": course_title,
                "Course URL": course_url,
                "Is Paid": course_is_paid,
                "Course Description": course_description,
                "Subscribers": subscribers,
                "Average Rating": average_rating,
                "Number of Reviews": num_reviews,
                "Number of Lectures": num_lectures,
                "Content Length": content_length,
                "Last Update": last_update,
                "Badges": badges,
                "Course Language": course_language,
                "Instructional Level": instructional_level,
                "Authors": author_names  # Add author information to the dictionary
            }

            all_courses_data.append(course_info)  # Append course details to the list

        # Increment the page number for the next iteration
        page += 1
    else:
        print(f"Failed to fetch data for page {page}. Status code:", response.status_code)
        break

# Save all the relevant data to a single JSON file
output_file = os.path.join(directory, "music_courses_data.json")
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(all_courses_data, json_file, ensure_ascii=False, indent=4)

print("Music Course data saved to:", output_file)


Failed to fetch data for page 626. Status code: 400
Music Course data saved to: music_course_data\music_courses_data.json
