# **Data Scraping**

## **Import Library**

In [1]:
import requests
import pandas as pd

## **Data Fetching**

In [7]:
url_opportunities = "https://api.kampusmerdeka.kemdikbud.go.id/magang/browse/opportunities"
url_detail = "https://api.kampusmerdeka.kemdikbud.go.id/magang/browse/opportunities/{}"
url_mitra = "https://api.kampusmerdeka.kemdikbud.go.id/mitra/public/id/{}"
limit = 100
offset = 0

all_data = pd.DataFrame()

while True:
    params = {
        "offset": offset,
        "limit": limit,
        "location_key": "",
        "mitra_key": "",
        "keyword": "",
        "sector_id": "",
        "sort_by": "",
        "order": "desc"
    }

    response = requests.get(url_opportunities, params=params)

    offset += limit
    data = response.json()["data"]
    if len(data) == 0:
        break

    # Mengupulkan detail data setiap id entry
    for entry_id in data:
        detail_url = url_detail.format(entry_id["id"])
        mitra_url = url_mitra.format(entry_id["mitra_id"])

        detail_response = requests.get(detail_url)
        mitra_url_response = requests.get(mitra_url)

        mitra_data = pd.json_normalize(mitra_url_response.json())
        detail_data = pd.json_normalize(detail_response.json())

        # Menghilangkan prefix "data." dan "benefits."
        mitra_data.columns = [col.replace("data.", "") for col in mitra_data.columns]
        detail_data.columns = [col.replace("data.", "").replace("benefits.", "") for col in detail_data.columns]

        detail_data['mitra_name'] = mitra_data['name']

        all_data = pd.concat([all_data, detail_data], ignore_index=True)

    print(f"Data collected: {len(all_data)}")

print(f"Total {len(all_data)} data collected")

Data collected: 100
Data collected: 200
Data collected: 300
Data collected: 400
Data collected: 500
Data collected: 600
Data collected: 682
Total 682 data collected


In [8]:
all_data

Unnamed: 0,meta,id,name,description,mitra_id,start_period,months_duration,activity_type,location,fields_of_study,...,show_salary,mobilization,accommodation,is_applied,wishlist,is_external,external_platform_name,external_platform_logo_url,mitra_name,salary
0,,b794e1a1-ecd6-449d-9ed9-0118a2e02626,Website Developer,<p><strong>Kualifikasi :</strong>&nbsp;</p><ul...,c7746ff1-87de-47bb-a43f-6d63f521e07c,2024-03-01T00:00:00+07:00,3,WFH,Kota Surabaya,[],...,False,False,False,False,False,False,,,CV. APPAREL BERKAH SELALU,
1,,7c8623ef-5f9b-470d-bc7a-73b1dd706c8b,Software Architect Intern,<p>To help architect team on demonstration of ...,af00bd20-3ac2-44a4-877f-af4307f9b599,2024-03-01T00:00:00+07:00,6,WFO,Kota Jakarta Selatan,[Teknik Informatika dan Ilmu Komputer],...,False,False,False,False,False,False,,,PT. Idemia Technologies Indonesia,
2,,bd95c730-b157-4fc5-8791-9dc9ea5e0e32,Product Marketing Intern,<p>- Design and develop sharepoint page to sup...,af00bd20-3ac2-44a4-877f-af4307f9b599,2024-03-01T00:00:00+07:00,6,WFO,Kota Jakarta Selatan,[Teknik Informatika dan Ilmu Komputer],...,False,False,False,False,False,False,,,PT. Idemia Technologies Indonesia,
3,,fc01895c-0d74-4874-a8f2-d1d46956d697,Asisten Finance,<ol><li>Pemahaman Sistem Keuangan Sekolah: And...,0bf5a4d1-9980-48ae-ad65-48e3b239f15c,2024-03-01T00:00:00+07:00,4,WFO,Kota Banda Aceh,[],...,False,False,False,False,False,False,,,SMAN 14 Banda Aceh,
4,,b6062e51-4764-474c-a2b7-617affd56959,Marketing Asuransi Mini Mikro,<p>WFH dari Seluruh Indonesia.</p><p><br></p><...,6bbc729e-c3ee-46a1-93be-2b9f3b5f478d,2024-03-01T00:00:00+07:00,4,WFH,Kota Banda Aceh,[],...,False,False,False,False,False,False,,,Natural Aceh,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,,7a2f6ef1-844d-4c70-99bc-03be7d5aac88,Marketing Creative Intern,Responsibilities \n\nSupport Marketing and Bra...,ee2084cf-f2b4-436a-b6a1-9e23d2e9827a,2024-02-06T07:28:52.149199+07:00,0,WFO,Tanah Abang,,...,True,False,False,False,False,True,KitaLulus,https://i.imgur.com/1WEx0zY.png,The Jakarta Post,0.0
678,,404ccd37-0c1e-43ab-bdfd-7b9a6fee551c,Graphic Design (Intern),Kami Idea merupakan sebuah usaha atau bisnis y...,0e19e313-3c8c-46ad-bb18-07e5a4fc2ef4,2024-02-06T07:28:52.149199+07:00,0,WFO,Mampang Prapatan,,...,True,False,False,False,False,True,KitaLulus,https://i.imgur.com/1WEx0zY.png,Kami Idea,0.0
679,,5a704786-6865-40c9-9086-a2bd85dc5098,Influencer Management Intern,- Last semester student from reputable univers...,32ac867c-45a6-4752-b7a9-23cbad490916,2024-02-06T07:28:52.149199+07:00,0,WFO,Tanjung Priok,,...,True,False,False,False,False,True,KitaLulus,https://i.imgur.com/1WEx0zY.png,Deca Group,0.0
680,,780902fd-e51a-463c-a012-e7a0941da47e,New Business Development Internship (Jakarta U...,- Final year student or fresh graduate \n- Pas...,e11ceed4-4513-4073-8674-eb7950a05c2b,2024-02-06T07:28:52.149199+07:00,0,WFO,Kelapa Gading,,...,True,False,False,False,False,True,KitaLulus,https://i.imgur.com/1WEx0zY.png,Sociolla,0.0


## **Save Data**

In [9]:
all_data.to_csv('magang_opportunities.csv', index=False)