This notebook is used to get the weather data from website by APIs.

At the end of project it will be merged into the main notebook.

In [None]:
# import necessary libraries for getting history weather data from openweathermap.org
import requests
import json
import pandas as pd
import time
import datetime

import warnings
warnings.filterwarnings('ignore')

In [None]:
# write a function to transform the weather data into a dataframe
def transform_weather_data(weather_data):
    '''
    This function is to transform the weather data into a dataframe
    '''
    # get the number of hours
    num_hours = len(weather_data)
    # get the date and time
    hourly_date = []
    hourly_time = []
    for i in range(num_hours):
        date_time = datetime.datetime.fromtimestamp(weather_data[i]['data'][0]['dt']).strftime('%Y-%m-%d %H:%M:%S')
        hourly_date.append(int(date_time.split(' ')[0].split('-')[2]))
        hourly_time.append(int(date_time.split(' ')[1].split(':')[0]))
    # get the hourly temperature
    hourly_temp = []
    for i in range(num_hours):
        hourly_temp.append(weather_data[i]['data'][0]['temp'])
    # get the hourly feels like temperature
    hourly_feels_like = []
    for i in range(num_hours):
        hourly_feels_like.append(weather_data[i]['data'][0]['feels_like'])
    # get the hourly humidity
    hourly_humidity = []
    for i in range(num_hours):
        hourly_humidity.append(weather_data[i]['data'][0]['humidity'])
    # get the hourly visibility
    hourly_visibility = []
    for i in range(num_hours):
        hourly_visibility.append(weather_data[i]['data'][0]['visibility'])
    # get the hourly wind speed
    hourly_wind_speed = []
    for i in range(num_hours):
        hourly_wind_speed.append(weather_data[i]['data'][0]['wind_speed'])
    # get the hourly weather description
    hourly_weather_description = []
    for i in range(num_hours):
        hourly_weather_description.append(weather_data[i]['data'][0]['weather'][0]['description'])
    # try to get the hourly rain volume
    hourly_rain = []
    for i in range(num_hours):
        try:
            hourly_rain.append(weather_data[i]['data'][0]['rain']['1h'])
        except:
            hourly_rain.append(0)
    # try to get the hourly snow volume
    hourly_snow = []
    for i in range(num_hours):
        try:
            hourly_snow.append(weather_data[i]['data'][0]['snow']['1h'])
        except:
            hourly_snow.append(0)
    # save the all the data into a dataframe
    weather_df = pd.DataFrame({'date': hourly_date, 'time': hourly_time,
                                'temp': hourly_temp, 'feels_like': hourly_feels_like,
                                'humidity': hourly_humidity, 'visibility': hourly_visibility, 
                                'wind_speed': hourly_wind_speed, 
                                'weather_description': hourly_weather_description, 
                                'rain': hourly_rain, 'snow': hourly_snow})
    
    # return the dataframe
    return weather_df

In [None]:
# write a function to get hourly history weather data from openweathermap.org
def get_weather_data(lat, lon, start_date, end_date, api_key):
    '''
    This function is to get hourly history weather data from openweathermap.org
    '''
    # get the start and end time in unix time

    start_time = int(time.mktime(datetime.datetime.strptime(start_date, "%Y-%m-%d").timetuple()))
    end_time = int(time.mktime(datetime.datetime.strptime(end_date, "%Y-%m-%d").timetuple()))
    # get the number of hours between start and end time
    num_hours = int((end_time - start_time) / 3600)
    # get the hourly history weather data
    weather_data = []
    for i in range(num_hours):
    # get the time in unix time
        time_unix = start_time + i * 3600
        # get the weather data
        url = f'http://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time_unix}&appid={api_key}&units=metric'
        response = requests.get(url)
        data = json.loads(response.text)
        weather_data.append(data)
        # wait for 1 second
        time.sleep(1)
    # save the weather data to a file
    folder_path = '../data/weather_data/json/'
    flie_name = f'weather_data_{start_date}_{end_date}.json'
    file_path = folder_path + flie_name
    with open(file_path, 'w') as f:
        json.dump(weather_data, f)
    print(f'The weather data has been saved to {file_path}.')

    # transform the weather data into a dataframe
    weather_df = transform_weather_data(weather_data)

    # save the dataframe to a file
    folder_path = '../data/weather_data/csv/'
    flie_name = f'weather_data_{start_date}_{end_date}.csv'
    file_path = folder_path + flie_name
    weather_df.to_csv(file_path, index=False)
    print(f'The weather data has been saved to {file_path}.')

    # return the weather data
    return f'The weather data of {start_date} to {end_date} has been saved to {file_path}.'
   

In [None]:
# get the hourly history weather data
lat,lon = 40.730610,-73.935242  # New York City
start_date = '2017-12-03'  # start date
end_date = '2017-12-05'  # end date
# get API from file
with open('../data/documentations/api.txt', 'r') as f:
    api_key = f.read()

weather_data = get_weather_data(lat, lon, start_date, end_date, api_key)