In [1]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install azure-storage-blob

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import json
import requests
import pandas as pd
from azure.storage.blob import BlobServiceClient

def fetch_data(url, offset=0, limit=2000):
    """Fetch data from the specified URL of a Web API with pagination."""
    try:
        params = {'$offset': offset, '$limit': limit}
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return None

def save_data_to_blob(data, filename, connection_string, container_name):
    """Converts a list of dictionaries to a Pandas DataFrame and uploads it as a CSV to Azure Blob Storage."""
    try:
        df = pd.DataFrame(data)
        blob_service_client = BlobServiceClient.from_connection_string(connection_string)
        blob_client = blob_service_client.get_blob_client(container=container_name, blob=filename)
        
        # Convert DataFrame to CSV and upload
        csv_data = df.to_csv(index=False)
        blob_client.upload_blob(csv_data, blob_type="BlockBlob", overwrite=True)
        print(f"Data uploaded to blob storage as {filename}")
    except Exception as e:
        print(f"Failed to upload data to blob: {e}")

def main():
    # Load configuration from JSON file
    with open('config.json', 'r') as file:
        config = json.load(file)

    api_url = config['api']['url']
    limit = config['api']['pagination_limit']
    connection_string = config['azure_storage']['connection_string']
    container_name = config['azure_storage']['container_name']
    blob_name = config['azure_storage']['blob_name']
    
    total_data = []
    offset = 0
    while True:
        data = fetch_data(api_url, offset, limit)
        if data:
            total_data.extend(data)
            offset += limit
        else:
            break
            
    if total_data:
        # Save all data to Azure Blob Storage
        save_data_to_blob(total_data, blob_name, connection_string, container_name)

if __name__ == "__main__":
    main()


Data uploaded to blob storage as nypd_shooting.csv
