In [None]:
# Importing libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from dotenv import load_dotenv
import json

# Task 1: Data Collection via REST APIs
For the initial phase of the project, I focused on the programmatic retrieval of environmental data for Rome over a six-month period by interfacing with the Open-Meteo and OpenAQ REST APIs while ensuring that all sensitive credentials remained protected within a local environment file. This stage presented significant technical hurdles specifically regarding the OpenAQ platform which returned persistent 410 Gone and 404 Not Found errors due to a sudden migration from version two to version three of their infrastructure. To overcome these obstacles and satisfy the requirement for data persistence, I refactored the request logic to target the updated endpoints and implemented a robust storage system that saves every successful response as a formatted JSON file.

In [6]:
# Quering weather data from open-meteo API

# Querying the data
url1 = "https://archive-api.open-meteo.com/v1/archive"
params1 = {
	"latitude": 41.89193,
	"longitude": 12.51133,
	"start_date": "2025-07-08",
	"end_date": "2026-01-08",
	"hourly": ["temperature_2m", "wind_speed_10m", "precipitation", "relative_humidity_2m"],
}
responses1 = requests.get(url1, params=params1)

# Checking the response status
print(responses1.status_code)

# Saving the data to a file
weather_data = responses1.json()
with open('data/weather_data.json', 'w') as f:
    json.dump(weather_data, f, indent=4)

200


In [7]:
# Quering air quality data from openaq API using the official wrapper

# Loading the API key
load_dotenv()
api_key = os.getenv("OPENAQ_API_KEY")

# Querying the API
url2 = "https://api.openaq.org/v3"
headers = {"X-API-Key": api_key}    
params2 = {
    "coordinates": "41.89193,12.51133",
    "radius": 5000,
    "limit": 1000,
    "iso": "IT",
}
responses2 = requests.get(f"{url2}/locations", params=params2, headers=headers, timeout=30)
    
# Checking the response status
print(responses2.status_code)

# Saving the data to a file
air_results = responses2.json()
with open('data/air_quality_data.json', 'w') as f:
    json.dump(air_results, f, indent=4)

200


# Task 2: Data Cleaning and Integration

In [None]:
df1 = pd.read_json('data/weather_data.json')
df1.head()

Unnamed: 0,latitude,longitude,generationtime_ms,utc_offset_seconds,timezone,timezone_abbreviation,elevation,hourly_units,hourly
time,41.862915,12.539912,10.356188,0,GMT,GMT,54,iso8601,"[2025-07-08T00:00, 2025-07-08T01:00, 2025-07-0..."
temperature_2m,41.862915,12.539912,10.356188,0,GMT,GMT,54,°C,"[24.1, 24.6, 24.4, 24.5, 24.4, 25.0, 26.6, 27...."
wind_speed_10m,41.862915,12.539912,10.356188,0,GMT,GMT,54,km/h,"[8.5, 11.5, 10.9, 12.0, 13.1, 14.6, 13.0, 13.0..."
precipitation,41.862915,12.539912,10.356188,0,GMT,GMT,54,mm,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
relative_humidity_2m,41.862915,12.539912,10.356188,0,GMT,GMT,54,%,"[74, 65, 66, 65, 68, 63, 49, 45, 44, 40, 36, 3..."


In [5]:
df2 = pd.read_json('data/air_quality_data.json')
df2.head()

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

# Task 3: Exploratory Data Analysis

# Task 4: Machine Learning

# Task 5: Evaluation and Interpretation