In [None]:
# Importing libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from dotenv import load_dotenv
import json

# Task 1: Data Collection via REST APIs
For the initial phase of the project, I focused on the programmatic retrieval of environmental data for Rome over a six-month period by interfacing with the Open-Meteo and OpenAQ REST APIs while ensuring that all sensitive credentials remained protected within a local environment file. This stage presented significant technical hurdles specifically regarding the OpenAQ platform which returned persistent 410 Gone and 404 Not Found errors due to a sudden migration from version two to version three of their infrastructure. To overcome these obstacles and satisfy the requirement for data persistence, I refactored the request logic to target the updated endpoints and implemented a robust storage system that saves every successful response as a formatted JSON file.

In [51]:
# Quering weather data from open-meteo API

# Querying the data
weather_url = "https://archive-api.open-meteo.com/v1/archive"
weather_params = {
	"latitude": 41.89193,
	"longitude": 12.51133,
	"start_date": "2025-07-08",
	"end_date": "2026-01-08",
	"hourly": ["temperature_2m", "wind_speed_10m", "precipitation", "relative_humidity_2m"],
}
weather_response = requests.get(
	weather_url, 
	params=weather_params
)

# Checking the response status
print(weather_response.status_code)

# Saving the data to a file
weather_data = weather_response.json()
with open('data/weather_data.json', 'w') as f:
    json.dump(weather_data, f, indent=4)

200


In [52]:
# Quering air quality data from openaq API using the official wrapper

# Loading the API key
load_dotenv()
api_key = os.getenv("OPENAQ_API_KEY")

# Querying the API
aq_url = "https://api.openaq.org/v3/sensors/7527/measurements/hourly"
aq_params={
    "datetime_from": "2025-07-08",
    "datetime_to": "2026-01-08",
    "limit": 1000
}
headers = {"X-API-Key": api_key}
aq_response = requests.get(
    aq_url,
    params=aq_params,
    headers=headers
)

# Checking the response status
print(aq_response.status_code)

# Saving the data to a file
aq_data = aq_response.json()
with open('data/air_quality_data.json', 'w') as f:
    json.dump(aq_data, f, indent=4)

200


# Task 2: Data Cleaning and Integration

In [53]:
with open('data/weather_data.json', 'r') as f:
    w_json = json.load(f)

df_weather = pd.DataFrame(w_json['hourly'], index=w_json['hourly']['time']).drop(columns=['time'])
df_weather.head()

Unnamed: 0,temperature_2m,wind_speed_10m,precipitation,relative_humidity_2m
2025-07-08T00:00,24.1,8.5,0.0,74
2025-07-08T01:00,24.6,11.5,0.0,65
2025-07-08T02:00,24.4,10.9,0.0,66
2025-07-08T03:00,24.5,12.0,0.0,65
2025-07-08T04:00,24.4,13.1,0.0,68


In [64]:
with open('data/air_quality_data.json', 'r') as f:
    raw_air_data = json.load(f)

df_air = pd.json_normalize(raw_air_data["results"])

df_air.head()

Unnamed: 0,value,coordinates,flagInfo.hasFlags,parameter.id,parameter.name,parameter.units,parameter.displayName,period.label,period.interval,period.datetimeFrom.utc,...,coverage.expectedCount,coverage.expectedInterval,coverage.observedCount,coverage.observedInterval,coverage.percentComplete,coverage.percentCoverage,coverage.datetimeFrom.utc,coverage.datetimeFrom.local,coverage.datetimeTo.utc,coverage.datetimeTo.local
0,100.0,,False,4,co,µg/m³,,1 hour,01:00:00,2025-07-07T23:00:00Z,...,1,01:00:00,1,01:00:00,100.0,100.0,2025-07-07T23:00:00Z,2025-07-08T01:00:00+02:00,2025-07-08T00:00:00Z,2025-07-08T02:00:00+02:00
1,100.0,,False,4,co,µg/m³,,1 hour,01:00:00,2025-07-08T01:00:00Z,...,1,01:00:00,1,01:00:00,100.0,100.0,2025-07-08T01:00:00Z,2025-07-08T03:00:00+02:00,2025-07-08T02:00:00Z,2025-07-08T04:00:00+02:00
2,100.0,,False,4,co,µg/m³,,1 hour,01:00:00,2025-07-08T02:00:00Z,...,1,01:00:00,1,01:00:00,100.0,100.0,2025-07-08T02:00:00Z,2025-07-08T04:00:00+02:00,2025-07-08T03:00:00Z,2025-07-08T05:00:00+02:00
3,100.0,,False,4,co,µg/m³,,1 hour,01:00:00,2025-07-08T03:00:00Z,...,1,01:00:00,1,01:00:00,100.0,100.0,2025-07-08T03:00:00Z,2025-07-08T05:00:00+02:00,2025-07-08T04:00:00Z,2025-07-08T06:00:00+02:00
4,100.0,,False,4,co,µg/m³,,1 hour,01:00:00,2025-07-08T04:00:00Z,...,1,01:00:00,1,01:00:00,100.0,100.0,2025-07-08T04:00:00Z,2025-07-08T06:00:00+02:00,2025-07-08T05:00:00Z,2025-07-08T07:00:00+02:00


# Task 3: Exploratory Data Analysis

# Task 4: Machine Learning

# Task 5: Evaluation and Interpretation