In [3]:
import requests
import pandas as pd
from tqdm import tqdm
from os import listdir
from datetime import date

In [4]:
class MET():
    '''A class to extract data from the Metropolitan Museum of Art API.

    See more information in: 
    https://metmuseum.github.io

    Attributes
    ----------

    object_type : str
        Name of object type that will be searched

    output : str
        Output path to the extracted data
    '''

    def __init__(self, object_type = None, output = None):
        self.object_type = object_type
        self.output = output

    
    def extract(self):
        '''
        Execute the requests to the API to extract the data into an CSV
        '''
        # Building URL to get object ID list
        medium_url = f'https://collectionapi.metmuseum.org/public/collection/v1/search?medium={self.object_type}&q=*'

        # Requesting data
        resp = requests.get(medium_url)

        # Selecting data in JSON       
        temp_lst = resp.json()['objectIDs'] 
        
        # Creating empty list to export results
        results = []

        # Iterating item list id to get all the data
        for item in tqdm(temp_lst):

            # Building url to get whole object data
            object_url = f'https://collectionapi.metmuseum.org/public/collection/v1/objects/{item}'

            # Requesting data
            resp = requests.get(object_url)

            # Selecting data in JSON       
            results.append(resp.json())

        # Transforming list of JSON into a dataframe
        df = pd.json_normalize(results)

        # Exporting dataframe in the selected path
        df.to_csv(f'{self.output}/MET_{self.object_type}_{date.today()}.csv',index=False)

In [5]:
met = MET('Paintings',output='./Output')
met.extract()

100%|██████████| 8102/8102 [1:21:54<00:00,  1.65it/s]
