In [11]:
import logging
import time
from datetime import datetime as dt
import pandas as pd
import requests
import json
from sqlalchemy import create_engine, text, types
from sqlalchemy.exc import OperationalError, InterfaceError, DatabaseError
from dotenv import dotenv_values
## Setting up Key elements of a request
##URL

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

##This section is to fetch the variables into context
config= dotenv_values()
# align the key label with your .env file !
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_user = config['POSTGRES_USER']
pg_password = config['POSTGRES_PASS']

base_api_url = config['WB_BASE_API_URL']



###Declare/Initialize Variables
start_year='1988'
end_year='2012'

country_codes=['DE'
               ,'IN' 
               ,'US'
               ,'AF'
               ]

indicator_codes=[
                'IT.NET.BBND.P2'
                ,'IT.NET.SECR.P6'
                ,'SL.UEM.TOTL.ZS'
                ,'SI.POV.NAHC'
                ,'SE.ADT.LITR.ZS'
                ,'IT.NET.USER.P2'
                ,'SE.TER.ENRR'
                ,'SE.SEC.ENRR'
                ,'IE.ICT.TOTL.GD.ZS'
                ,'IT.TEL.INVS.RV.ZS'
                ,'NY.GDP.PCAP.KD.ZG'
                ,'IT.NET.BNDW.PC'
                ,'IT.CEL.SETS.P2'
                ,'IT.MLT.MAIN.P2'
                ]   

format_type="json"
per_page=100
page=1

col_type_dict = {
                 'extracted_at':types.DateTime() 
                 ,'country_code':types.String() 
                 ,'indicator_code':types.String() 
                 ,'extracted_data':types.JSON()
                }                    

col_data_dict = {
                'extracted_at':[] 
                ,'country_code':[]
                ,'indicator_code':[] 
                ,'extracted_data':[]
               }

# Create the database connection string
DATABASE_URL = f"postgresql://{pg_user}:{pg_password}@{pg_host}:{pg_port}/{pg_db}"
# Use pandas to import the DataFrame to the PostgreSQL table
table_name = f"api_data_digi_sa_work"

###Defining functions
class apiOperations:
    def generate_monthly_dates(self, start_date, end_date):
        logging.info(f"Generating monthly dates between Start Date: {start_date} and End Date: {end_date}")
        first_days = pd.date_range(start=start_date, end=end_date, freq='MS')
        monthly_dates = [(first_day.strftime('%Y-%m-%d'), (first_day + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')) for first_day in first_days]
        return monthly_dates

    def api_call(self, base_api_url, query_dict, query_params):
        logging.info(f"API call made to {base_api_url} with Query: {query_dict}")
        BASE_URL = f"{base_api_url}/country/{country}/indicator/{indicator}?date={start_year}:{end_year}"
        API_URL= BASE_URL.format(**query_dict)
        try:
            start_time = time.time()
            response_obj = requests.get(API_URL, params=query_params)
            response_obj.raise_for_status()
            end_time = time.time()
            logging.info(f"API call duration: {end_time - start_time:.2f} seconds")
            if response_obj.status_code == 200 and response_obj.text.strip():
                logging.info(f"Response after API call {response_obj.text}")
                return response_obj.text
            else:
                logging.warning(f"Request failed with status code {response_obj.status_code} or empty response")
                return None
        except requests.exceptions.RequestException as e:
            logging.error(f"API call failed for {query_dict}: {e}")
            return None

    
    def prepare_columns_data(self, query_dict, response_str):
        logging.info(f"Creating response for station {query_dict['country']}")
        col_data_dict['extracted_at'].append(dt.now().strftime('%Y-%m-%d %H:%M:%S'))
        col_data_dict['country_code'].append(country)
        col_data_dict['indicator_code'].append(query_dict["indicator"])
        col_data_dict['extracted_data'].append(json.loads(response_str))   

    def perform_db_ops(self, DATABASE_URL, col_type_dict, col_data_dict):
        logging.info(f"Writing data to DB")
        database_df = pd.DataFrame(col_data_dict)
        try:
            start_time = time.time()
            engine = create_engine(DATABASE_URL, connect_args={"connect_timeout": 5})
            database_df.to_sql(table_name, engine, schema=pg_schema, if_exists='replace', index=False, dtype=col_type_dict)
            end_time = time.time()
            logging.info(f"DB write duration: {end_time - start_time:.2f} seconds")
            logging.info("Data successfully imported to the PostgreSQL database.")
            with engine.connect() as connection:
                result = connection.execute(text(f"SELECT count(*) FROM {pg_schema}.{table_name};"))
                logging.info(f"Count of Rows in table {table_name}: {result.scalar()}")
        except (OperationalError, InterfaceError, DatabaseError) as db_err:
            logging.error(f"Database error occurred: {db_err}")
        except Exception as e:
            logging.error(f"An unexpected error occurred: {e}")

# Creating an instance of the class
api_ops = apiOperations()

# Control the flow of code
total_start_time = time.time()
for country in country_codes:
    for indicator in indicator_codes:       
        query_dict={
                    'country': country
                    ,'indicator': indicator
                    ,'start_date': start_year
                    ,'end_date': end_year
                    }     
        query_params={  
                    'format': format_type
                    ,'per_page': per_page
                    ,'page': page
                    }   
        response_str = api_ops.api_call(base_api_url, query_dict, query_params)
        
        if response_str:
            api_ops.prepare_columns_data(query_dict, response_str)

# Perform DB operations once after collecting all data
api_ops.perform_db_ops(DATABASE_URL, col_type_dict, col_data_dict)
total_end_time = time.time()
logging.info(f"Total execution time: {total_end_time - total_start_time:.2f} seconds")

2024-10-27 16:18:25,575 - INFO - API call made to https://api.worldbank.org/v2 with Query: {'country': 'DE', 'indicator': 'IT.NET.BBND.P2', 'start_date': '1988', 'end_date': '2012'}
2024-10-27 16:18:25,736 - INFO - API call duration: 0.16 seconds
2024-10-27 16:18:25,736 - INFO - Response after API call [{"page":1,"pages":1,"per_page":100,"total":25,"sourceid":"2","lastupdated":"2024-10-24"},[{"indicator":{"id":"IT.NET.BBND.P2","value":"Fixed broadband subscriptions (per 100 people)"},"country":{"id":"DE","value":"Germany"},"countryiso3code":"DEU","date":"2012","value":34.5501,"unit":"","obs_status":"","decimal":2},{"indicator":{"id":"IT.NET.BBND.P2","value":"Fixed broadband subscriptions (per 100 people)"},"country":{"id":"DE","value":"Germany"},"countryiso3code":"DEU","date":"2011","value":33.7099,"unit":"","obs_status":"","decimal":2},{"indicator":{"id":"IT.NET.BBND.P2","value":"Fixed broadband subscriptions (per 100 people)"},"country":{"id":"DE","value":"Germany"},"countryiso3code"