# Extract Forecast Information from Weather Map

---


By Mauricio Hernandez

Goal(s):
- Collect and download forecasting data from a set of locations using the WeatherMap API.

---
See documentation at: See documentation at: https://openweathermap.org/api/one-call-3

In [None]:
#Import libraries
import requests
import pandas as pd
import os
import json
import glob
from pathlib import Path
#import datetime
import time
from datetime import timezone
from datetime import datetime
from google.colab import drive
from datetime import timedelta

In [None]:
#Connect to google drive
drive.mount('/content/gdrive')
#access path where notebook is stored
path = "/content/gdrive/MyDrive/Lumiere Program/Niharika Yalla/Code/"
#Change the working directory to the folder
os.chdir(path)

Mounted at /content/gdrive


### Create dictionary with sites information

In [None]:
sites = { 1: {'Name': 'San Francisco', 'Latitude': 37.7749, 'Longitude': -122.4194, 'Timezone': 'America/Los Angeles', 'Elevation': 52.0},
          2: {'Name': 'San Diego', 'Latitude': 32.7157, 'Longitude': -117.1611, 'Timezone': 'America/Los Angeles', 'Elevation': 19.0},
          3: {'Name': 'Los Angeles', 'Latitude': 34.0549, 'Longitude': -118.2426, 'Timezone': 'America/Los Angeles', 'Elevation': 89.0},
          4: {'Name': 'Sacramento', 'Latitude': 38.5816, 'Longitude': -121.4944, 'Timezone': 'America/Los Angeles', 'Elevation': 8.0}
         }

In [None]:
#Read API_key
# In a text file write NREL api key in the first line and save it
with open('../Data/Inputs/WeatherMap_APIKey') as f:
    line = f.readline()
api_key = line.replace('\n', '')

## Download Forecast Data

In [None]:
def convert_timestamp_to_text(datestamp, date_format):
  date_str = datetime.utcfromtimestamp(datestamp).strftime(date_format)
  return date_str

### 3 Hour Resolution Forecast - FREE

In [None]:
#Example of API call
#api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={API key}

In [None]:
def req_forecast_3hr(api_key, lon, lat):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/2.5/forecast?'
  params = {
      'appid': api_key,
      'lat': lat,
      'lon': lon
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
site_number = 1
date_format = '%Y-%m-%d %H:%M:%S'

resp = req_forecast_3hr(api_key, lat = sites[site_number]['Latitude'], lon = sites[site_number]['Longitude'])
json_data = json.loads(resp.text)

df_forecast = pd.json_normalize(json_data['list'])
df_forecast.insert(1, 'dt_machine', 0)
df_forecast['dt_machine'] = df_forecast['dt']
df_forecast['dt'] = df_forecast.apply(lambda x: convert_timestamp_to_text(x['dt'], date_format), axis =1)
df_forecast.drop(columns=['dt_txt'], axis =1, inplace=True)
df_forecast.head()

{"cod":"200","message":0,"cnt":40,"list":[{"dt":1700168400,"main":{"temp":289.97,"feels_like":289.61,"temp_min":289.97,"temp_max":290.54,"pressure":1015,"sea_level":1015,"grnd_level":1012,"humidity":73,"temp_kf":-0.57},"weather":[{"id":803,"main":"Clouds","description":"broken clouds","icon":"04d"}],"clouds":{"all":80},"wind":{"speed":2.91,"deg":47,"gust":3.74},"visibility":10000,"pop":0,"sys":{"pod":"d"},"dt_txt":"2023-11-16 21:00:00"},{"dt":1700179200,"main":{"temp":289.95,"feels_like":289.62,"temp_min":289.95,"temp_max":290.09,"pressure":1014,"sea_level":1014,"grnd_level":1011,"humidity":74,"temp_kf":-0.14},"weather":[{"id":500,"main":"Rain","description":"light rain","icon":"10d"}],"clouds":{"all":88},"wind":{"speed":1.05,"deg":44,"gust":1.87},"visibility":10000,"pop":0.27,"rain":{"3h":1.01},"sys":{"pod":"d"},"dt_txt":"2023-11-17 00:00:00"},{"dt":1700190000,"main":{"temp":288.5,"feels_like":288.34,"temp_min":288.5,"temp_max":288.5,"pressure":1015,"sea_level":1015,"grnd_level":1013,

NameError: ignored

In [None]:
#Save file
#results_filename = './outputs/forecast_{}_{}_3hr.csv'.format(sites[site_number]['name'], datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
results_filename = '../Data/Outputs/forecast_{}_{}_3hr.csv'.format(sites[site_number]['Name'], datetime.now().strftime("%m-%d-%Y_%H-%M"))
print('Saving file: {} '.format(results_filename))
df_forecast.to_csv(results_filename, index = False)

Saving file: ../Data/Outputs/forecast_San Francisco_11-06-2023_02-57_3hr.csv 


## Hourly Forecast

> Getting this data is not free, but we have 1,000 calls per day for free.

In [None]:
#Example call:
# https://api.openweathermap.org/data/3.0/onecall?lat={lat}&lon={lon}&exclude={part}&appid={API key}
def req_forecast(api_key, lon, lat):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/3.0/onecall?'
  params = {
      'appid': api_key,
      'exclude':'alerts',
      'lat': lat,
      'lon': lon
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
#Select site
site_number = 4
sites[site_number]

resp = req_forecast(api_key, lat = sites[site_number]['Latitude'], lon = sites[site_number]['Longitude'])
json_data = json.loads(resp.text)

date_format = '%Y-%m-%d %H:%M:%S'

df_forecast_current = pd.json_normalize(json_data['current'])
df_forecast_hr = pd.json_normalize(json_data['hourly'])
df_forecast_day = pd.json_normalize(json_data['daily'])

#df_forecast_hr.insert(1, 'dt_machine', 0)
df_forecast_hr.rename(columns={"dt": "dt_machine"}, inplace = True)
df_forecast_hr.insert(0, 'dt', 0)
df_forecast_hr.insert(2, 'timezone', json_data['timezone'])
df_forecast_hr.insert(3, 'timezone_offset', json_data['timezone_offset'])

df_forecast_hr['dt'] = df_forecast_hr.apply(lambda x: convert_timestamp_to_text(x['dt_machine'], date_format), axis =1)

results_filename = '../Data/Inputs/current_forecast_{}_{}_hourly.csv'.format(sites[site_number]['Name'], datetime.now().strftime("%m-%d-%Y_%H-%M"))

print('Saving file: {} '.format(results_filename))
df_forecast_hr.to_csv(results_filename, index = False)

{"lat":38.5816,"lon":-121.4944,"timezone":"America/Los_Angeles","timezone_offset":-28800,"current":{"dt":1700751928,"sunrise":1700751399,"sunset":1700786907,"temp":278.97,"feels_like":277.98,"pressure":1014,"humidity":96,"dew_point":278.38,"uvi":0,"clouds":20,"visibility":8047,"wind_speed":1.54,"wind_deg":300,"weather":[{"id":701,"main":"Mist","description":"mist","icon":"50d"}]},"minutely":[{"dt":1700751960,"precipitation":0},{"dt":1700752020,"precipitation":0},{"dt":1700752080,"precipitation":0},{"dt":1700752140,"precipitation":0},{"dt":1700752200,"precipitation":0},{"dt":1700752260,"precipitation":0},{"dt":1700752320,"precipitation":0},{"dt":1700752380,"precipitation":0},{"dt":1700752440,"precipitation":0},{"dt":1700752500,"precipitation":0},{"dt":1700752560,"precipitation":0},{"dt":1700752620,"precipitation":0},{"dt":1700752680,"precipitation":0},{"dt":1700752740,"precipitation":0},{"dt":1700752800,"precipitation":0},{"dt":1700752860,"precipitation":0},{"dt":1700752920,"precipitati

## Historical Forecast

**TODO: Change this, this is in fact historical weather data**

In [None]:
#https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={API key}

def req_hist_forecast(api_key, lon, lat, datetime):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/3.0/onecall//timemachine?'
  params = {
      'appid': api_key,
      'exclude':'alerts',
      'lat': lat,
      'lon': lon,
      'dt': datetime,
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r


In [None]:
start_utc = datetime.strptime('2023-10-15 00:00:00', '%Y-%m-%d %H:%M:%S')
end_utc = datetime.strptime('2022-10-26 23:00:00', '%Y-%m-%d %H:%M:%S')

#resp = req_hist_forecast(api_key, lat = sites[site_number]['latitude'], lon = sites[site_number]['longitude'], datetime = '1687102209')
#json_hist_data = json.loads(resp.text)

In [None]:
df_hist_forecast_hr = pd.DataFrame()
df_forec_period = pd.DataFrame()

#start_datetime = datetime.strptime('2023-06-19 00:00:00', '%Y-%m-%d %H:%M:%S')

start_datetime = datetime.strptime('2023-10-15 00:00:00', '%Y-%m-%d %H:%M:%S')
end_datetime = datetime.strptime('2023-10-31 23:00:00', '%Y-%m-%d %H:%M:%S')

diff_time = end_datetime - start_datetime
n_hours = diff_time.days*24 + diff_time.seconds/3600

hist_datetime = start_datetime

for i in range(0, int(n_hours)):
  #start = start + timedelta(days = i*i)
  #end = end + timedelta(days = i*i)

  hist_epoch_timestamp = hist_datetime.replace(tzinfo=timezone.utc).timestamp()
  print(hist_datetime)

  #resp = req_hist_forecast(api_key= api_key, lon=longitude, lat=latitude,
  #                    datetime=int(hist_epoch_timestamp))

  resp = req_hist_forecast(api_key, lat = sites[site_number]['latitude'], lon = sites[site_number]['longitude'], datetime = int(hist_epoch_timestamp))
  json_hist_data = json.loads(resp.text)

  df_hist_forecast_hr = pd.json_normalize(json_hist_data['data'])

  #json_data = json.loads(resp.text)

  #df_week = pd.json_normalize(json_data['list'])
  #change formating of epoch time to UTC

  #df_week.dt = df_week.dt.apply(datetime.fromtimestamp)

  #df_weather = df_weather.append(df_week, ignore_index= True)
  #df_weather = pd.concat([df_weather, df_forec_period], ignore_index=True)
  hist_datetime = hist_datetime  + timedelta(hours = 1)

  date_format = '%Y-%m-%d %H:%M:%S'

  #df_forecast_hr.insert(1, 'dt_machine', 0)
  df_hist_forecast_hr.rename(columns={"dt": "dt_machine"}, inplace = True)
  df_hist_forecast_hr.insert(0, 'dt', 0)
  df_hist_forecast_hr.insert(2, 'timezone', json_hist_data['timezone'])
  df_hist_forecast_hr.insert(3, 'timezone_offset', json_hist_data['timezone_offset'])

  df_hist_forecast_hr['dt'] = df_hist_forecast_hr.apply(lambda x: convert_timestamp_to_text(x['dt_machine'], date_format), axis =1)

  #df_weather = df_weather.append(df_hist_forecast_hr, ignore_index= True)
  df_forec_period = pd.concat([df_forec_period, df_hist_forecast_hr], ignore_index=True)

results_filename = './outputs/current_weather_forecasts/forecast_{}_{}_to{}_hr.csv'.format(sites[site_number]['name'], start_datetime.strftime('%Y-%m-%d_%H-%M'), end_datetime.strftime('%Y-%m-%d_%H-%M') )

print('Saving file: {} '.format(results_filename))
df_forec_period.to_csv(results_filename, index = False)

2023-06-15 00:00:00
{"lat":21.4469,"lon":-158.1887,"timezone":"Pacific/Honolulu","timezone_offset":-36000,"data":[{"dt":1686787200,"sunrise":1686757815,"sunset":1686806157,"temp":299.63,"feels_like":299.63,"pressure":1018,"humidity":56,"dew_point":290.15,"clouds":75,"visibility":10000,"wind_speed":4.63,"wind_deg":80,"wind_gust":8.23,"weather":[{"id":803,"main":"Clouds","description":"broken clouds","icon":"04d"}]}]}
2023-06-15 01:00:00
{"lat":21.4469,"lon":-158.1887,"timezone":"Pacific/Honolulu","timezone_offset":-36000,"data":[{"dt":1686790800,"sunrise":1686757815,"sunset":1686806157,"temp":300.31,"feels_like":301.13,"pressure":1017,"humidity":56,"dew_point":290.78,"clouds":75,"visibility":10000,"wind_speed":7.72,"wind_deg":90,"weather":[{"id":803,"main":"Clouds","description":"broken clouds","icon":"04d"}]}]}
2023-06-15 02:00:00
{"lat":21.4469,"lon":-158.1887,"timezone":"Pacific/Honolulu","timezone_offset":-36000,"data":[{"dt":1686794400,"sunrise":1686757815,"sunset":1686806157,"temp

In [None]:
date_format = '%Y-%m-%d %H:%M:%S'

#df_forecast_current = pd.json_normalize(json_hist_data['current'])
df_hist_forecast_hr = pd.json_normalize(json_hist_data['data'])
#df_forecast_day = pd.json_normalize(json_data['daily'])
#df_forecast_minute = pd.json_normalize(json_data['minutely'])
#df_forecast_alert = pd.json_normalize(json_data['alerts'])

#df_forecast_hr.insert(1, 'dt_machine', 0)
df_hist_forecast_hr.rename(columns={"dt": "dt_machine"}, inplace = True)
df_hist_forecast_hr.insert(0, 'dt', 0)
df_hist_forecast_hr.insert(2, 'timezone', json_data['timezone'])
df_hist_forecast_hr.insert(3, 'timezone_offset', json_data['timezone_offset'])

df_hist_forecast_hr['dt'] = df_hist_forecast_hr.apply(lambda x: convert_timestamp_to_text(x['dt_machine'], date_format), axis =1)

results_filename = './outputs/current_weather_forecasts/forecast_{}_{}_hourly.csv'.format(sites[site_number]['name'], '2018-06-18')
print('Saving file: {} '.format(results_filename))
df_hist_forecast_hr.to_csv(results_filename, index = False)

Saving file: ./outputs/current_weather_forecasts/forecast_Donalsonville, GA Substation_2018-06-18_hourly.csv 


In [None]:
df_forecast_day = pd.json_normalize(json_data['daily'])
df_forecast_day.rename(columns={"dt": "dt_machine"}, inplace = True)
df_forecast_day.insert(0, 'dt', 0)
df_forecast_day.insert(2, 'timezone', json_data['timezone'])
df_forecast_day.insert(3, 'timezone_offset', json_data['timezone_offset'])

results_filename = './outputs/current_weather_forecasts/forecast_{}_{}_daily.csv'.format(sites[site_number]['name'], datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
print('Saving file: {} '.format(results_filename))
df_forecast_day.to_csv(results_filename, index = False)

Saving file: ./outputs/current_weather_forecasts/forecast_Donalsonville, GA Substation_2023-06-26_15-22-08_daily.csv 


In [None]:
df_forecast_current.rename(columns={"dt": "dt_machine"}, inplace = True)
df_forecast_current.insert(0, 'dt', 0)
df_forecast_current['dt'] = df_forecast_current.apply(lambda x: convert_timestamp_to_text(x['dt_machine'], date_format), axis =1)

df_forecast_current.insert(2, 'timezone', json_data['timezone'])
df_forecast_current.insert(3, 'timezone_offset', json_data['timezone_offset'])

results_filename = './outputs/current_weather_forecasts/forecast_{}_{}_current.csv'.format(sites[site_number]['name'], datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
print('Saving file: {} '.format(results_filename))
df_forecast_current.to_csv(results_filename, index = False)

Saving file: ./outputs/forecast_Waianae, HI Substation_2023-06-18_11-29-39_current.csv 


In [None]:

df_forecast_minute.rename(columns={"dt": "dt_machine"}, inplace = True)
df_forecast_minute.insert(0, 'dt', 0)
df_forecast_minute['dt'] = df_forecast_minute.apply(lambda x: convert_timestamp_to_text(x['dt_machine'], date_format), axis =1)

df_forecast_minute.insert(2, 'timezone', json_data['timezone'])
df_forecast_minute.insert(3, 'timezone_offset', json_data['timezone_offset'])

results_filename = './outputs/current_weather_forecasts/forecast_{}_{}_minute.csv'.format(sites[site_number]['name'], datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
print('Saving file: {} '.format(results_filename))
df_forecast_minute.to_csv(results_filename, index = False)

Saving file: ./outputs/forecast_Donalsonville, GA Substation_2023-06-06_14-40-29_minute.csv 
