In [130]:
import requests
import pandas as pd 
from pandas import json_normalize
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta
from sqlalchemy import create_engine , inspect
import psycopg2
import re

In [8]:
load_dotenv()

api_key = os.getenv('weather_api_key')
base_url = 'http://api.weatherapi.com/v1'
history_url = base_url + "/history.json"

db_name = os.getenv('db_name')
user = os.getenv('user')
password = os.getenv('password')
host = os.getenv('host')
port = os.getenv('port')

engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}')

capitals = [
    "Johor Bahru", 
    "Alor Setar", 
    "Kota Bharu", 
    "Melaka", 
    "Seremban", 
    "Kuantan", 
    "George Town", 
    "Ipoh", 
    "Kangar", 
    "Kota Kinabalu", 
    "Kuching", 
    "Shah Alam", 
    "Kuala Terengganu",
    "Kuala Lumpur" 
]
dates_string = [(datetime.now() - timedelta(day)).strftime("%Y-%m-%d") for day in range(1,9)]
dates = [(datetime.now() - timedelta(day)) for day in range(1,9)]

In [135]:
def get_hourly_history():
    for capital in capitals:    
        for date in dates_string:
            params = {'key': api_key, 'q': capital , 'dt': date}
            r = requests.get(history_url , params=params).json()
            df = pd.json_normalize(r)
            hours_df = json_normalize(df['forecast.forecastday'][0][0]['hour'])

            hours_df['location'] = df['location.name']
            hours_df['region'] = df['location.region']
            hours_df['country'] = df['location.country']
            hours_df = hours_df.ffill(axis=0)
            capital = re.sub(r'\s+', '_', capital)
            hours_df.to_sql(f"{capital}_hourly" , if_exists='append' , index=False , con=engine)

get_hourly_history()

In [None]:
def get_hourly_history():
    history_url = base_url + "/history.json"
    dates = [(datetime.now() - timedelta(day)).strftime("%Y-%m-%d") for day in range(1,9)]
    hourly_dict = {}
    
    for date in dates:
        for capital in capitals:
            params = {"key": api_key, "q": capital, "dt": date}
            try:
                response = requests.get(history_url, params=params)
                if response.status_code == 200:
                    history_data = response.json()
                    df = pd.json_normalize(history_data['forecast']['forecastday'][0]['hour'])
                    # hourly = history_data['forecast']['forecastday'][0]['hour']
                    
                    # hourly_dict = {}
                    
                    # for d in hourly:
                    #     for key, value in d.items():
                    #         if key in hourly_dict:
                    #             hourly_dict[key].append(value)
                    #         else:
                    #             hourly_dict[key] = [value]
                    # df = pd.DataFrame(hourly_dict)
                    # print(df.head())
                    table_name = f"{capital}_{date}"
                    df.to_sql(table_name, engine, if_exists='replace', index=False)

                    # inspector = inspect(engine)
                    # if inspector.has_table(table_name):
                    #     print(f"{table_name} is already existed, skipping...")
                    #     continue
                    # else:
                    #     df.to_sql(table_name, engine, if_exists='replace', index=False)

                else:
                    print(f"Error: Received unexpected status code {response.status_code} on {capital}")

            except requests.exceptions.RequestException as e:
                print(f"An error occurred: {e}")

get_hourly_history()

In [78]:
def get_hourly_history():
    folder_path = 'hourly_data'
    os.makedirs(folder_path, exist_ok=True)
    history_url = base_url + "/history.json"

    dates = [(datetime.now() - timedelta(day)).strftime("%Y-%m-%d") for day in range(1,9)]
    hourly_dict = {}
    
    for date in dates[:1]:
        for capital in capitals[:1]:
            params = {"key": api_key, "q": capital, "dt": date}
            try:
                response = requests.get(history_url, params=params)
                if response.status_code == 200:
                    history_data = response.json()

                    hourly = history_data['forecast']['forecastday'][0]['hour']
                    
                    # hourly_dict = {}
                    
                    for d in hourly:
                        for key, value in d.items():
                            if key in hourly_dict:
                                hourly_dict[key].append(value)
                            else:
                                hourly_dict[key] = [value]
                    df = pd.DataFrame(hourly_dict)
                    df = pd.json_normalize(df)
                    print(df.head())

                    # file_name = f"{capital}_{date}"
                    # file_path = os.path.join(folder_path , file_name)

                    # if os.path.exists(file_path):
                    #     print(f"{file_path} is already existed, skipping...")
                    #     continue
                    # else:
                    #     df.to_csv(file_path , index=False, header=True, encoding=None)
                else:
                    print(f"Error: Received unexpected status code {response.status_code} on {capital}")

            except requests.exceptions.RequestException as e:
                print(f"An error occurred: {e}")

get_hourly_history()

Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]
