In [1]:
import requests
import json
from urllib.parse import unquote

# Define the base URL for the API
base_url = "https://data.abudhabi/opendata"

# Define a list of API endpoint paths
endpoint_paths = [
    "/api/1/metastore/schemas/dataset/items/56c88964-cded-4559-be79-366bb661fa41",
    "/api/1/metastore/schemas/dataset/items/9c850a4b-d850-4d7f-8aa1-c29c05fa3637",
    "/api/1/metastore/schemas/dataset/items/8135e082-5ab1-472d-b54f-5d8509ad8c8c",
    "/api/1/metastore/schemas/dataset/items/377da685-9f8a-4170-9a40-00668d8a124b",
    "/api/1/metastore/schemas/dataset/items/b4c370b5-a251-4e03-9217-0ff3efd0be46"

    # Add more endpoint paths here as needed
]

# Create a list to store dataset information
dataset_info_list = []

# Loop through the endpoint paths
for endpoint_path in endpoint_paths:
    # Combine the base URL and endpoint path to form the full API URL
    api_url = f"{base_url}{endpoint_path}"

    try:
        # Send an HTTP GET request to the API endpoint
        response = requests.get(api_url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse the JSON response containing data
            data = response.json()

            # Access the "distribution" field from the data
            distributions = data.get('distribution')
            if distributions:
                # Iterate through the distribution items
                for distribution in distributions:
                    # Access and print the download URL
                    download_url = distribution.get('downloadURL')
                    if download_url:
                        # Extract dataset name from the URL and decode it
                        parts = download_url.split('/')
                        dataset_name = unquote(parts[-1].split('.')[0])

                        # Create a dictionary with the extracted information
                        dataset_info = {
                            "dataset_name": dataset_name,
                            "download_url": download_url
                        }

                        # Append the dataset information to the list
                        dataset_info_list.append(dataset_info)

        else:
            print(f"Failed to retrieve data for endpoint {endpoint_path}. Status code: {response.status_code}")

    except requests.exceptions.RequestException as e:
        print(f"Request error for endpoint {endpoint_path}: {e}")

# Convert the list of dataset information to a JSON object
dataset_info_json = json.dumps(dataset_info_list, ensure_ascii=False, indent=4)

# Print the JSON object
print(dataset_info_json)


[
    {
        "dataset_name": "Get?id=3a8143db-a37a-4413-8618-d14911af4dbd",
        "download_url": "https://admin.bayanat.ae/File/Get?id=3a8143db-a37a-4413-8618-d14911af4dbd"
    },
    {
        "dataset_name": "Get?id=e48a180f-9a17-411c-8acb-f5fbca6497ca",
        "download_url": "https://admin.bayanat.ae/File/Get?id=e48a180f-9a17-411c-8acb-f5fbca6497ca"
    },
    {
        "dataset_name": "Get?id=6ccff7b5-7add-49b5-8e5d-fcdfcd1b5203",
        "download_url": "https://admin.bayanat.ae/File/Get?id=6ccff7b5-7add-49b5-8e5d-fcdfcd1b5203"
    },
    {
        "dataset_name": "Get?id=cb1f7f54-0506-45e6-afe9-6aa9104836d6",
        "download_url": "https://admin.bayanat.ae/File/Get?id=cb1f7f54-0506-45e6-afe9-6aa9104836d6"
    },
    {
        "dataset_name": "Get?id=c84aebb9-1d0a-44b5-84fc-1fab108bd465",
        "download_url": "https://admin.bayanat.ae/File/Get?id=c84aebb9-1d0a-44b5-84fc-1fab108bd465"
    }
]


In [2]:
import os

os.makedirs("datasets", exist_ok=True)

parsed_data = json.loads(dataset_info_json)

for item in parsed_data:
    dataset_name = item["dataset_name"]
    download_url = item["download_url"]

    # Use wget to download each dataset with a specified name
    !wget {download_url} -O {dataset_name}.excel

--2023-09-19 00:01:23--  https://admin.bayanat.ae/File/Get?id=3a8143db-a37a-4413-8618-d14911af4dbd
Resolving admin.bayanat.ae (admin.bayanat.ae)... 185.54.16.7
Connecting to admin.bayanat.ae (admin.bayanat.ae)|185.54.16.7|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10093 (9.9K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘Get?id=3a8143db-a37a-4413-8618-d14911af4dbd.excel’


2023-09-19 00:01:24 (146 MB/s) - ‘Get?id=3a8143db-a37a-4413-8618-d14911af4dbd.excel’ saved [10093/10093]

--2023-09-19 00:01:24--  https://admin.bayanat.ae/File/Get?id=e48a180f-9a17-411c-8acb-f5fbca6497ca
Resolving admin.bayanat.ae (admin.bayanat.ae)... 185.54.16.7
Connecting to admin.bayanat.ae (admin.bayanat.ae)|185.54.16.7|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13600 (13K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘Get?id=e48a180f-9a17-411c-8acb-f5fbca6497ca.excel’


2023-09-19 

In [3]:
import pandas as pd
import numpy as np

In [4]:
data1 = pd.read_excel("/content/datasets/Get?id=3a8143db-a37a-4413-8618-d14911af4dbd.excel")

In [9]:
data1.head()

Unnamed: 0,year,title_ar,title_en,value
0,2014,المعارف العامة,General Information,55572
1,2014,الفلسفة وعلم النفس,Philosophy and psychology,31093
2,2014,الديانات,Religions,127603
3,2014,العلوم الاجتماعية,Social Sciences,127795
4,2014,العلوم الاجتماعية والأعمال التجارية والقانون,"Social Sciences, Business and Law",146


In [5]:
data2 = pd.read_excel('/content/datasets/Get?id=6ccff7b5-7add-49b5-8e5d-fcdfcd1b5203.excel')

In [10]:
data2.head()

Unnamed: 0,year,Title_en,Title_ar,Gender_en,Gender_ar,value
0,2014 / 2013,Schools,المدارس,Male,ذكور,259
1,2014 / 2013,"Teaching, staff",الهيئات التعليمية,Male,ذكور,6955
2,2014 / 2013,Students,الطلاب,Male,ذكور,129382
3,2014 / 2013,Schools,المدارس,Female,إناث,236
4,2014 / 2013,"Teaching, staff",الهيئات التعليمية,Female,إناث,17960


In [6]:
data3 = pd.read_excel("/content/datasets/Get?id=c84aebb9-1d0a-44b5-84fc-1fab108bd465.excel")

In [11]:
data3.head()

Unnamed: 0,year,Degree_en,Degree_ar,Studenton_en,Studenton_ar,Gender_en,Gender_ar,value
0,2015/2014,Doctoral,دكتوراة,Scholarships,المبتعثين,Male,ذكر,41
1,2015/2014,Doctoral,دكتوراة,Scholarships,المبتعثين,Female,أنثى,47
2,2015/2014,Doctoral,دكتوراة,Graduates,الخريجين,Male,ذكر,6
3,2015/2014,Doctoral,دكتوراة,Graduates,الخريجين,Female,أنثى,6
4,2015/2014,Master's,ماجستير,Scholarships,المبتعثين,Male,ذكر,66


In [7]:
data4 = pd.read_excel("/content/datasets/Get?id=cb1f7f54-0506-45e6-afe9-6aa9104836d6.excel")

In [12]:
data4.head()

Unnamed: 0,year,Title_en,Title_ar,typeofeducation_en,typeofeducation_ar,value
0,2014 / 2013,Schools,المدارس,G.E,حكومي,673
1,2014 / 2013,Classrooms,الفصول,G.E,حكومي,12364
2,2014 / 2013,"Teaching, Staff",الهيئات التعليمية,G.E,حكومي,24915
3,2014 / 2013,Students,الطلاب,G.E,حكومي,272504
4,2014 / 2013,Schools,المدارس,P.E,خاص,509


In [8]:
data5 = pd.read_excel("/content/datasets/Get?id=e48a180f-9a17-411c-8acb-f5fbca6497ca.excel")

In [13]:
data5.head()

Unnamed: 0,Year,Professional_Status_Ar,Professional_Status_En,Nationality_Ar,Nationality_En,Gender_Ar,Gender_En,Value
0,2016/2015,أستاذ,Professor,مواطنين,National,ذكور,Male,3
1,2016/2015,أستاذ مشارك,Associate Professor,مواطنين,National,ذكور,Male,11
2,2016/2015,أستاذ مساعد,Assistant Professor,مواطنين,National,ذكور,Male,42
3,2016/2015,محاضر,Lecturer,مواطنين,National,ذكور,Male,5
4,2015/2014,أستاذ,Professor,مواطنين,National,ذكور,Male,2
