In [5]:
import requests
import pandas as pd
from tqdm.notebook import tqdm

# Define headers
headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9,az;q=0.8',
    'Authorization': 'undefined undefined',
    'Cache-Control': 'no-cache, no-store, must-revalidate, private',
    'Client_type': 'web',
    'Content-Type': 'application/json',
    'Dnt': '1',
    'Manifest': '23.46_B253_86778e16',
    'Namens_studentnummer': '',
    'Pragma': 'no-cache',
    'Referer': 'https://uvt.osiris-student.nl/onderwijscatalogus/extern/cursussen',
    'Release_version': '86778e16b9c920e6044e0458fb619ca45ff3300e',
    'Sec-Ch-Ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': '"Windows"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'Taal': 'EN',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

# Read course IDs from the file
with open('course_IDs.txt', 'r') as file:
    course_ids = [line.strip() for line in file]

# Initialize df
df = pd.DataFrame(columns=['url', 'year', 'course_title', 'language', 'college', 'lecturers_urls', 'lecturers', 'programmes', 'text'])

# Define the number of chunks
num_chunks = 1
chunk_size = len(course_ids) // num_chunks

# Loop through course IDs with tqdm for progress bar
for i in tqdm(range(0, len(course_ids), chunk_size), desc="Processing Courses in Chunks"):
    chunk_ids = course_ids[i:i+chunk_size]
    chunk_number = i // chunk_size

    for course_id in tqdm(chunk_ids, desc="Processing Chunk"):
        url = f"https://uvt.osiris-student.nl/student/osiris/owc/cursussen/{course_id}"

        # Make request with headers
        response = requests.get(url, headers=headers)

        # make dict
        data = response.json()

        # initialize variables
        year = None
        course_title = None
        language = None
        college = None
        lecturers_urls = None
        lecturers = None
        programmes = None
        text = None

        # Extract information from the response
        for r in data['items']:
            if r['rubriek'] == 'rubriek-kop':
                year = r['velden'][1]['waarde']
                course_title = r['velden'][3]['waarde']

            if r['rubriek'] == 'rubriek-praktische-info':
                language = r['velden'][0]['waarde']['velden'][0]['waarde']

            if r['rubriek'] == 'rubriek-zoek':
                college = r['velden'][0]['waarde']

            if r['rubriek'] == 'rubriek-docenten':
                lecturers = []
                lecturers_c = r['velden'][0]['waarde']
                for l in lecturers_c:
                    try:
                        if l['omschrijving'] == 'Lecturer':
                            d = l['velden']
                            for i in d:
                                lecturers.append(i['docent'])
                    except:
                        pass

            if r['rubriek'] == 'rubriek-inhoud':
                text = ""
                for i in r['velden']:
                    text += i['titel']
                    text += "\n"
                    text += i['waarde']
                    text += "\n"

        data_dict = {
            'url': url,
            'year': year,
            'course_title': course_title,
            'language': language,
            'college': college,
            'lecturers_urls': lecturers_urls,
            'lecturers': lecturers,
            'programmes': programmes,
            'text': text
        }

        # Add dict to df
        df = df._append(data_dict, ignore_index=True)

    # Save the dataframe to a CSV file after processing each chunk
    df.to_csv(f'course_details_{chunk_number}.csv', index=False)

    # Clear the dataframe for the next chunk
    #df = pd.DataFrame(columns=['url', 'year', 'course_title', 'language', 'college', 'lecturers_urls', 'lecturers', 'programmes', 'text'])

df


Processing Courses in Chunks:   0%|          | 0/1 [00:00<?, ?it/s]

Processing Chunk:   0%|          | 0/2104 [00:00<?, ?it/s]

Unnamed: 0,url,year,course_title,language,college,lecturers_urls,lecturers,programmes,text
0,https://uvt.osiris-student.nl/student/osiris/o...,2023,(Dis)Trust in Connected Societies,English,Tilburg University,,[dr. F.J.Y. Tomas],,"Aims\n<html lang=""en"" xml:lang=""en"">\n<head>\n..."
1,https://uvt.osiris-student.nl/student/osiris/o...,2023,(Semi)parametric Econometrics (CentER),English,Tilburg School of Economics and Management,,"[D. Kojevnikov, dr. P. Cizek]",,Aims\n<html>\n<head>\n\t<title></title>\n\t<st...
2,https://uvt.osiris-student.nl/student/osiris/o...,2023,21st Century Perspectives on Education,English,Tilburg School of Humanities and Digital Sciences,,"[dr. A.M.L. Aarts, dr. J.H.M. van Rijt, dr. J....",,Aims\n<html>\n<head>\n\t<title></title>\n\t<st...
3,https://uvt.osiris-student.nl/student/osiris/o...,2023,A Primer on International Business Law,English,Tilburg Law School,,[prof. dr. C.F. van der Elst],,Aims\n<html>\n<head>\n\t<title></title>\n\t<me...
4,https://uvt.osiris-student.nl/student/osiris/o...,2023,AI for Business,English,Tilburg School of Economics and Management,,"[dr. H. Weigand, dr. S. Shekhar]",,"Aims\n<html lang=""en"" xml:lang=""en"">\n<head>\n..."
...,...,...,...,...,...,...,...,...,...
2099,https://uvt.osiris-student.nl/student/osiris/o...,2023,Writing and Information Skills,English,Tilburg School of Economics and Management,,"[dr. P.H.E. van der Zande, dr. S.E. Peraino, d...",,Aims\n<html>\n<head>\n\t<title></title>\n\t<me...
2100,https://uvt.osiris-student.nl/student/osiris/o...,2023,Youth Culture and Spirituality,Dutch,Tilburg School of Catholic Theology,,"[prof. dr. M.C.H. van Dijk-Groeneboer, prof.dr...",,Aims\n<html>\n<head>\n\t<title></title>\n\t<st...
2101,https://uvt.osiris-student.nl/student/osiris/o...,2023,Zingeving bij trauma en geweld,Dutch,Tilburg School of Catholic Theology,,"[Dr J.N. den Toom, prof. dr. E. Sengers]",,Aims\n<html>\n<head>\n\t<title></title>\n\t<st...
2102,https://uvt.osiris-student.nl/student/osiris/o...,2023,ong name,Dutch,Tilburg University,,[K.J.M. van Oosterbosch],,Aims\n\nContent\n\n


In [26]:
data

{'items': [{'rubriek': 'rubriek-links',
   'velden': [{'veld': 'item-links',
     'waarde': [{'velden': [{'titel': 'Timetable',
         'url': 'https://timetables.eur.nl/link?timetables.0.key=BAP-MG&timetables.0.type=module&timetables.0.source=2018&timetables.1.key=BAP-MG_2018&timetables.1.type=timeedit_course&timetables.1.source=TimeEdit&ignoreEmpty=true&ignoreNotFound=true&view=list'}]}]}]},
  {'rubriek': 'rubriek-zoek',
   'titel': '',
   'weergave': 'UIT',
   'velden': [{'veld': 'item-faculteit-zoek',
     'titel': 'Faculteit',
     'waarde': 'Rotterdam School of Management, Erasmus University'},
    {'veld': 'item-punten-zoek', 'titel': 'Punten', 'waarde': '0 ECTS'}]},
  {'rubriek': 'rubriek-kop',
   'titel': '',
   'weergave': 'UIT',
   'velden': [{'veld': 'cursus', 'waarde': 'BAP-MG'},
    {'veld': 'collegejaar', 'waarde': 2018},
    {'veld': 'cursus_korte_naam',
     'waarde': '1st Year Student Mentor Programme'},
    {'veld': 'cursus_lange_naam',
     'waarde': '1st Year Stud