In [1]:
import os
import json
import requests
import logging
import pandas as pd

from dotenv import load_dotenv
from pydantic import ValidationError
from utils.input_configuration import APIClientInputConfiguration

logger = logging.getLogger("ingestion_logger")
logger.setLevel(logging.INFO)

In [2]:
# Read the configuration file
logger.info("Loading configuration file...")

with open("../config/config_file.json", "r") as file:
    config = json.load(file)

api_configuration = config.get("api", {})
city_configuration = config.get("cities", [])

logger.info("Configuration file loaded successfully.")

# Load the environment variables from the env file
logger.info("Loading content from .env file")

if not load_dotenv("../.env"):
    raise FileNotFoundError("Could not find the .env file.")
else:
    load_dotenv("../.env")

    api_key = os.getenv("API_KEY")
    if not api_key:
        raise ValueError("API_KEY not found in the .env file.")

    raw_files_path = os.getenv("RAW_FILES_PATH")
    if not raw_files_path:
        raw_files_path = "../data/files"
        logger.warning(
            f"RAW_FILES_PATH not found in the .env file, using {raw_files_path}."
        )

logger.info(".env file loaded successfully.")

#CREATE THE RAW FILES DIRECTORY IF IT DOES NOT EXIST

In [3]:
class APIClient:
    def __init__(
        self,
        base_url: str,
        api_key: str,
        units: str = "metric",
        language: str = "en",
        logger: logging.Logger = None,
    ):
        self.logger = (
            logger
            if isinstance(logger, logging.Logger)
            else logging.getLogger(__name__)
        )
        self.logger.info("Validating input parameters")

        try:
            APIClientInputConfiguration(
                base_url=base_url,
                api_key=api_key,
                units=units,
                language=language,
            )
        except ValidationError as e:
            self.logger.error(f"Validation error: {e}")
            raise

        self.base_url = base_url
        self.api_key = api_key
        self.units = units
        self.language = language

        self.logger.info("Input parameters validated successfully.")

    def build_request_url(self, city: dict) -> str:
        if not isinstance(city, dict):
            raise TypeError(
                f"Expected 'city' to be a dictionary, but got: {type(city).__name__} instead"
            )

        city_name = city.get("name")

        if city_name:
            self.logger.info("City name provided, building URL with city name.")
            request_url = f"{self.base_url}?q={city_name}&appid={api_key}&units={self.units}&lang={self.language}"
        else:
            raise ValueError("No valid city information provided.")

        return request_url

    def fetch_data(self, city: dict) -> dict:
        self.logger.info("Fetching data from API")
        request_url = self.build_request_url(city)
        response = requests.get(request_url)

        if response.status_code != 200:
            raise Exception(
                f"Error fetching data: {response.status_code} - {response.text}"
            )

        self.logger.info("Data fetched successfully.")
        return response.json()

In [4]:
# Create the API Client
api_client = APIClient(
    base_url=api_configuration.get(
        "base_url", "https://api.openweathermap.org/data/2.5/weather"
    ),
    api_key=api_key,
    units=api_configuration.get("units", "metric"),
    language=api_configuration.get("language", "en"),
    logger=logger,
)

# Fetch the data
for city in city_configuration:
    city_data = api_client.fetch_data(city=city)

    city_name = city_data.get("name")
    measurement_timestamp_unix = city_data.get("dt", 0)
    measurement_timestamp_string = pd.to_datetime(
        measurement_timestamp_unix, unit="s"
    ).strftime("%Y%m%d_%H%M%S")

    # Check if the directory that will store the files exists
    path = f"{raw_files_path}/{city_name}"

    try:
        os.makedirs(path)
        logging.info(f"Creating directory {path}")
    except FileExistsError:
        logging.info(f"Directory {path} already exists.")

    file_path = f"{path}/{measurement_timestamp_string}_{city_name}.json"

    with open(file_path, "w") as file:
        json.dump(city_data, file, indent=4)

    logging.info(f"Data file {file_path} created successfully")

INFO:ingestion_logger:Fetching data from API
INFO:ingestion_logger:City name provided, building URL with city name.
INFO:ingestion_logger:Data fetched successfully.


In [7]:
file_path

'data/files/Braga/20250725_205151_Braga.json'