In [2]:
import requests
from dotenv import  load_dotenv
import os

load_dotenv()

def call_api_to_get_data(date, enddate, city = "Ho+Chi+Minh+City"):
    
    link = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx?q={city}&date={date}&enddate={enddate}&key={api_key}&format=json&tp=1".format(
        date =date,
        enddate = enddate,
        city = city,
        api_key = os.getenv("api_key")
    )
    response = requests.get(link)
    return response.json()

In [42]:
import pandas as pd

df_humidity = pd.read_csv("historical-hourly-weather-dataset/humidity.csv")
df_pressure = pd.read_csv("historical-hourly-weather-dataset/pressure.csv")
df_temperature = pd.read_csv("historical-hourly-weather-dataset/temperature.csv")
df_weather_desc = pd.read_csv("historical-hourly-weather-dataset/weather_description.csv")
df_wind_dir = pd.read_csv("historical-hourly-weather-dataset/wind_direction.csv")
df_wind_speed = pd.read_csv("historical-hourly-weather-dataset/wind_speed.csv")

In [None]:
from datetime import datetime, timedelta
import calendar

start_date = datetime(2012, 10, 1)
end_date = datetime(2017, 11, 30)
date_format = "%Y-%m-%d"

current_date = start_date
dates_list = []

while current_date <= end_date:
    last_day_of_month = calendar.monthrange(current_date.year, current_date.month)[1]
    
    first_day_of_month = current_date.replace(day=1)
    last_day_of_month = current_date.replace(day=last_day_of_month)
    
    dates_list.append((first_day_of_month.strftime(date_format), last_day_of_month.strftime(date_format)))
    
    current_date = last_day_of_month + timedelta(days=1)

for date in dates_list:
    print(date)

In [None]:
import numpy as np 

def handle_append_hcm_data(json, humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr):

    for day_data in json["data"]["weather"]:
        
        date = day_data["date"]
        for hour_data in day_data['hourly']:
            time = "{date} {time}:00:00".format(date = date, time = int(int(hour_data["time"])/100))
            wind_speed_ms = round(float(hour_data["windspeedKmph"])* (10/36),2)
            wind_dir = int(hour_data["winddirDegree"])
            humidity = int(hour_data["humidity"])
            pressure = int(hour_data["pressure"])
            tempK = int(hour_data["tempC"]) + 273.15
            weather_desc = hour_data["weatherDesc"][0]["value"]
            
            time_arr = np.append(time_arr, time)
            humidity_arr = np.append(humidity_arr ,  humidity)
            wind_speed_arr = np.append(wind_speed_arr, wind_speed_ms)
            wind_dir_arr = np.append(wind_dir_arr, wind_dir)
            pressure_arr = np.append(pressure_arr, pressure)
            temp_arr = np.append(temp_arr, tempK)
            weather_desc_arr = np.append(weather_desc_arr, weather_desc)
    
    return humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr


In [70]:
from IPython.display import clear_output, display

humidity_arr = np.array([])
time_arr = np.array([])
wind_dir_arr = np.array([])
wind_speed_arr = np.array([])
pressure_arr = np.array([])
temp_arr = np.array([])
weather_desc_arr = np.array([], dtype = object)
count = 0
for date in dates_list:
    clear_output(wait = True)
    print(count / len(dates_list) * 100)
    count += 1
    json = call_api_to_get_data(date[0], date[1])
    humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr = handle_append_hcm_data(
        json, humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr)



98.38709677419355


In [71]:
df_hcm_humidity = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : humidity_arr})
df_hcm_pressure = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : pressure_arr})
df_hcm_wind_dir = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : wind_dir_arr})
df_hcm_wind_speed = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : wind_speed_arr})
df_hcm_temp= pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : temp_arr})
df_hcm_weather_desc = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : weather_desc_arr})

df_humidity = pd.merge(df_humidity, df_hcm_humidity, on="datetime")
df_pressure = pd.merge(df_pressure, df_hcm_pressure, on="datetime")
df_wind_dir = pd.merge(df_wind_dir, df_hcm_wind_dir, on="datetime")
df_wind_speed = pd.merge(df_wind_speed, df_hcm_wind_speed, on="datetime")
df_temperature = pd.merge(df_temperature, df_hcm_temp, on="datetime")
df_weather_desc = pd.merge(df_weather_desc, df_hcm_weather_desc, on="datetime")

In [76]:
df_humidity.to_csv("historical-hourly-weather-dataset/humidity.csv", index= False)
df_pressure.to_csv("historical-hourly-weather-dataset/pressure.csv", index= False)
df_wind_dir.to_csv("historical-hourly-weather-dataset/wind_direction.csv", index= False)
df_wind_speed.to_csv("historical-hourly-weather-dataset/wind_speed.csv", index= False)
df_temperature.to_csv("historical-hourly-weather-dataset/temperature.csv", index= False)
df_weather_desc.to_csv("historical-hourly-weather-dataset/weather_description.csv", index= False)

In [15]:
import dill 

dill.dump_session("nb.db")

In [1]:
import dill

dill.load_session("nb.db")