# Extract Weather Information from Weather Map

---


By Mauricio Hernandez

Goal(s):
- Collect and download data from a set of locations using the WeatherMap API.

---
See documentation at: See documentation at: https://openweathermap.org/api/one-call-3

In [None]:
#Import libraries
import requests
import pandas as pd
import os
import json
#import datetime
import time
from datetime import timezone
from datetime import datetime
from google.colab import drive
from datetime import timedelta

In [None]:
#Connect to google drive
drive.mount('/content/gdrive')
#access path where notebook is stored
#path = "/content/gdrive/MyDrive/Women In Technology/Niharika Yalla/Code"
path = "/content/gdrive/MyDrive/Lumiere Program/Niharika Yalla/Code"
#Change the working directory to the folder
os.chdir(path)

Mounted at /content/gdrive


In [None]:
#Check files
os.listdir(path)

['Extract Weather Info WeatherMap.ipynb',
 'Hello.ipynb',
 'weather_info.csv',
 'Read_csv_Nikki.ipynb',
 'car_prices',
 'Predict_car_prices.ipynb',
 'Predict_car_prices_Nikki',
 'Read_CA_Weather_forecast.ipynb',
 'weather_San Francisco_11-04-2023_13-03.csv',
 'weather_San Diego_11-04-2023_13-04.csv',
 'CA_Forecast_nikki ',
 'Read_csv_Mau.ipynb',
 'weather_files_ combine_nikki.ipynb',
 'Combine_actuals_weather_files.ipynb',
 'Combine_forecast_weather.ipynb',
 'Combine weather forecasts and actuals.ipynb',
 'Current_Forecast_WeatherMap.ipynb']

In [None]:
#Read API_key
# In a text file write NREL api key in the first line and save it
with open('../Data/Inputs/WeatherMap_APIKey') as f:
    line = f.readline()
api_key = line.replace('\n', '')

*Define longitude and latitude and other parameters*

## Download Historical Free Data

In [None]:
sites = { 1: {'Name': 'San Francisco', 'Latitude': 37.7749, 'Longitude': -122.4194, 'Timezone': 'America/Los Angeles', 'Elevation': 52.0},
          2: {'Name': 'San Diego', 'Latitude': 32.7157, 'Longitude': -117.1611, 'Timezone': 'America/Los Angeles', 'Elevation': 19.0},
          3: {'Name': 'Los Angeles', 'Latitude': 34.0549, 'Longitude': -118.2426, 'Timezone': 'America/Los Angeles', 'Elevation': 89.0},
          4: {'Name': 'Sacramento', 'Latitude': 38.5816, 'Longitude': -121.4944, 'Timezone': 'America/Los Angeles', 'Elevation': 8.0}
         }

In [None]:
#Example of API call
#https://history.openweathermap.org/data/2.5/history/city?lat=35.05&lon=-106.53&type=hour&start=1672444800&end=1672527600&appid=<YOUR API KEY>
#https://history.openweathermap.org/data/2.5/history/city?lat=35.05&lon=-106.53&type=hour&start=1672444800&end=1672527600&appid=499817a2d0296f45499bb26bf6039e01
#https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=39.099724&lon=-94.578331&dt=1643803200&appid={API key}
#current weather
#https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API key}


In [None]:
def req_current_data(api_key, lon, lat):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/2.5/weather?'
  params = {
      'appid': api_key,
      'lat': lat,
      'lon': lon,
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
#define coordinates and timeframe (all values in UTC Zone)
site_number = 1
latitude = sites[site_number]['Latitude']
longitude = sites[site_number]['Longitude']
#start_timestamp = start_utc.replace(tzinfo=timezone.utc).timestamp()
#end_timestamp = end_utc.replace(tzinfo=timezone.utc).timestamp()
sites[site_number]

{'Name': 'San Francisco',
 'Latitude': 37.7749,
 'Longitude': -122.4194,
 'Timezone': 'America/Los Angeles',
 'Elevation': 52.0}

In [None]:
#info is read for each week because of the limitations of the API call
df_weather = pd.DataFrame()
df_week = pd.DataFrame()

resp = req_current_data(api_key= api_key, lon=longitude, lat=latitude)

json_data = json.loads(resp.text)

df_weather = pd.json_normalize(json_data['main'])
df_wind = pd.json_normalize(json_data['wind'])

df_weather.insert(0, "date_machine", json_data['dt'])
df_weather.insert(0, "dt_utc", df_weather.date_machine.apply(datetime.fromtimestamp))
df_weather['clouds'] = pd.json_normalize(json_data['clouds'])
df_weather['visibility'] = json_data['visibility']
df_weather

{"coord":{"lon":-117.1628,"lat":32.7174},"weather":[{"id":802,"main":"Clouds","description":"scattered clouds","icon":"03d"}],"base":"stations","main":{"temp":298.61,"feels_like":298.33,"temp_min":292.46,"temp_max":305.74,"pressure":1015,"humidity":43},"visibility":10000,"wind":{"speed":5.14,"deg":320},"clouds":{"all":40},"dt":1699137698,"sys":{"type":1,"id":5771,"country":"US","sunrise":1699106924,"sunset":1699145739},"timezone":-25200,"id":5391811,"name":"San Diego","cod":200}


Unnamed: 0,dt_utc,date_machine,temp,feels_like,temp_min,temp_max,pressure,humidity,clouds,visibility
0,2023-11-04 22:41:38,1699137698,298.61,298.33,292.46,305.74,1015,43,40,10000


In [None]:
#Save results
df_weather.to_csv('../Data/Inputs/weather_{}_{}.csv'.format(sites[site_number]['Name'], datetime.now().strftime("%m-%d-%Y_%H-%M")), index = False)

## Download Historical Data

In [None]:
#Example of API call
#https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=39.099724&lon=-94.578331&dt=1643803200&appid={API key}
#499817a2d0296f45499bb26bf6039e01

In [None]:
def convert_timestamp_to_text(datestamp, date_format):
  date_str = datetime.utcfromtimestamp(datestamp).strftime(date_format)
  return date_str

In [None]:
def req_hist_data_v3(api_key, lon, lat, date):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/3.0/onecall/timemachine?'
  params = {
      'appid': api_key,
      'lat': lat,
      'lon': lon,
      'dt': date,
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
def req_historical_day_weather(date_str, api_key: str, lon:float, lat:float):
  df_weather = pd.DataFrame()
  df_weather_hr = pd.DataFrame()

  date_format = '%Y-%m-%d %H:%M:%S'
  date = datetime.strptime(date_str, date_format)

  #for h in range(1, 24*154 + 1):
  for h in range(1, 24*1+1): # one day
    date_timestamp = date.replace(tzinfo=timezone.utc).timestamp()
    resp = req_hist_data_v3(api_key= api_key, lon=longitude, lat=latitude, date=int(date_timestamp))
    json_data = json.loads(resp.text)

    df_weather_hr = pd.json_normalize(json_data['data'])
    df_weather_hr['dt'] = df_weather_hr.apply(lambda x: convert_timestamp_to_text(x['dt'], date_format), axis =1)
    df_weather_hr['sunrise'] = df_weather_hr.apply(lambda x: convert_timestamp_to_text(x['sunrise'], date_format), axis =1)
    df_weather_hr['sunset'] = df_weather_hr.apply(lambda x: convert_timestamp_to_text(x['sunset'], date_format), axis =1)
    df_weather_hr.insert(1, 'timezone', json_data['timezone'])
    df_weather_hr.insert(2, 'timezone_offset', json_data['timezone_offset'])

    df_weather = pd.concat([df_weather, df_weather_hr], ignore_index=True)
    date = date  + timedelta(hours = 1)
  return df_weather

for day in range(1, 21):
  date_str = '2023-11-{} 00:00:00'.format(day)
  site_number = 4
  latitude = sites[site_number]['Latitude']
  longitude = sites[site_number]['Longitude']

  print('Requesting data from site: {} for day {}'.format(sites[site_number]['Name'], date_str))
  df_day_weather = req_historical_day_weather(date_str, api_key= api_key, lon = longitude, lat = latitude)

  # Save file in Data/Outputs folder
  file_namepath = '../Data/Inputs/weather_{}_{}.csv'.format(sites[site_number]['Name'], date_str.split()[0])
  print("Saving weather info in file {}".format(file_namepath))
  df_day_weather.to_csv(file_namepath, index = False)

df_day_weather.head()

Requesting data from site: Sacramento for day 2023-11-1 00:00:00
{"lat":38.5816,"lon":-121.4944,"timezone":"America/Los_Angeles","timezone_offset":-25200,"data":[{"dt":1698796800,"sunrise":1698762680,"sunset":1698800862,"temp":295.1,"feels_like":294.08,"pressure":1021,"humidity":28,"dew_point":275.78,"clouds":0,"visibility":10000,"wind_speed":2.57,"wind_deg":320,"weather":[{"id":800,"main":"Clear","description":"clear sky","icon":"01d"}]}]}
{"lat":38.5816,"lon":-121.4944,"timezone":"America/Los_Angeles","timezone_offset":-25200,"data":[{"dt":1698800400,"sunrise":1698762680,"sunset":1698800862,"temp":292.81,"feels_like":291.72,"pressure":1021,"humidity":34,"dew_point":276.53,"clouds":0,"visibility":10000,"wind_speed":2.06,"wind_deg":20,"weather":[{"id":800,"main":"Clear","description":"clear sky","icon":"01d"}]}]}
{"lat":38.5816,"lon":-121.4944,"timezone":"America/Los_Angeles","timezone_offset":-25200,"data":[{"dt":1698804000,"sunrise":1698762680,"sunset":1698800862,"temp":289.35,"feels

Unnamed: 0,dt,timezone,timezone_offset,sunrise,sunset,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,visibility,wind_speed,wind_deg,wind_gust,weather
0,2023-11-20 00:00:00,America/Los_Angeles,-28800,2023-11-19 14:52:19,2023-11-20 00:50:41,288.57,287.34,1028,45,276.71,0.25,0,10000,6.17,320,10.8,"[{'id': 800, 'main': 'Clear', 'description': '..."
1,2023-11-20 01:00:00,America/Los_Angeles,-28800,2023-11-19 14:52:19,2023-11-20 00:50:41,287.39,286.15,1027,49,276.84,0.0,0,10000,7.72,330,,"[{'id': 800, 'main': 'Clear', 'description': '..."
2,2023-11-20 02:00:00,America/Los_Angeles,-28800,2023-11-19 14:52:19,2023-11-20 00:50:41,286.37,285.03,1027,49,275.9,0.0,0,10000,6.69,340,,"[{'id': 800, 'main': 'Clear', 'description': '..."
3,2023-11-20 03:00:00,America/Los_Angeles,-28800,2023-11-19 14:52:19,2023-11-20 00:50:41,286.0,284.72,1027,53,276.67,0.0,0,10000,7.72,340,,"[{'id': 800, 'main': 'Clear', 'description': '..."
4,2023-11-20 04:00:00,America/Los_Angeles,-28800,2023-11-19 14:52:19,2023-11-20 00:50:41,285.69,284.44,1027,55,276.9,0.0,0,10000,7.2,320,10.8,"[{'id': 800, 'main': 'Clear', 'description': '..."


In [None]:
file_namepath = '../Data/Inputs/weather_{}_{}.csv'.format(sites[site_number]['Name'], date_str.split()[0])
print("Saving weather info in file {}".format(file_namepath))
df_day_weather.to_csv(file_namepath, index = False)

Saving weather info in file ../Data/Inputs/weather_San Francisco_2023-10-16.csv


## Irradiance (NOT FREE)

In [None]:
# Typical API call
#https://api.openweathermap.org/energy/1.0/solar/data?lat=60.45&lon=-38.67&date=2023-03-30&tz=+03:00&appid={API key}

In [None]:
def add_hour_to_date(date, n_hour):
  date = date + timedelta(hours = int(n_hour))
  return date

In [None]:
def req_irradiance(api_key, lon, lat, date):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/energy/1.0/solar/data?'
  params = {
      'appid': api_key,
      'lat': lat,
      'lon': lon,
      'date': date
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
'''date = datetime.strptime('2022-01-01', '%Y-%m-%d')
date_str = date.strftime('%Y-%m-%d')

resp = req_irradiance(api_key= api_key, lon=longitude, lat=latitude, date=date_str)
json_data = json.loads(resp.text)

df_irr_day = pd.json_normalize(json_data['irradiance']['hourly'])

df_irr_day.insert(0, 'dt_local', 0)
df_irr_day.insert(1, 'gmt_difference', json_data['tz'])
df_irr_day
'''

{"lat":21.446911,"lon":-158.188736,"date":"2022-01-01","tz":"-10:00","sunrise":"2022-01-01T07:10:30","sunset":"2022-01-01T18:02:34","irradiance":{"daily":[{"clear_sky":{"ghi":4682.08,"dni":7721.49,"dhi":767.91},"cloudy_sky":{"ghi":3455.0,"dni":4089.36,"dhi":1228.25}}],"hourly":[{"hour":0,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":1,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":2,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":3,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":4,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":5,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":6,"clear_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0},"cloudy_sky":{"ghi":0.0,"dni":0.0,"dhi":0.0}},{"hour":7,"clear_sky"

{'lat': 21.446911,
 'lon': -158.188736,
 'date': '2022-01-01',
 'tz': '-10:00',
 'sunrise': '2022-01-01T07:10:30',
 'sunset': '2022-01-01T18:02:34',
 'irradiance': {'daily': [{'clear_sky': {'ghi': 4682.08,
     'dni': 7721.49,
     'dhi': 767.91},
    'cloudy_sky': {'ghi': 3455.0, 'dni': 4089.36, 'dhi': 1228.25}}],
  'hourly': [{'hour': 0,
    'clear_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0},
    'cloudy_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0}},
   {'hour': 1,
    'clear_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0},
    'cloudy_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0}},
   {'hour': 2,
    'clear_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0},
    'cloudy_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0}},
   {'hour': 3,
    'clear_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0},
    'cloudy_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0}},
   {'hour': 4,
    'clear_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0},
    'cloudy_sky': {'ghi': 0.0, 'dni': 0.0, 'dhi': 0.0}},
   {'hour': 5,
    'clear_sky': {'ghi'

## Irradiance by dates (NOT SUSCRIBED)

In [None]:
# Example:
#https://api.openweathermap.org/data/2.5/solar_radiation/history?lat={lat}&lon={lon}&start={start}&end={end}&appid={API key}

In [None]:
sites

{1: {'Name': 'San Antonio',
  'Latitude': 35.05,
  'Longitude': -106.53,
  'Timezone': 'GMT+7',
  'Elevation': 1657.0},
 2: {'Name': 'Waianae',
  'Latitude': 21.446911,
  'Longitude': -158.188736,
  'Timezone': 'Pacific/Honolulu',
  'Elevation': 4.0},
 3: {'Name': 'Amity',
  'Latitude': 45.114559,
  'Longitude': -123.204903,
  'Timezone': 'America/Los Angeles',
  'Elevation': 50.0},
 4: {'Name': 'Donalsonville',
  'Latitude': 31.044241,
  'Longitude': -84.879128,
  'Timezone': 'America/New York',
  'Elevation': 0.0}}

In [None]:
def req_irradiance_hist(api_key, lon, lat, start_date, end_date):
  # If the request is successful, a CSV file with the requested info will be send to the email sent as parameter
  api_url = 'https://api.openweathermap.org/data/2.5/solar_radiation/history?'
  params = {
      'appid': api_key,
      'lat': lat,
      'lon': lon,
      'start': start_date,
      'end':end_date
  }

  try:
      (r := requests.get(api_url, params=params)).raise_for_status()
      print(r.text[0:1000])
  except Exception as e:
      print(f'Data acquisition failed due to {e}')
  return r

In [None]:
site_number = 2
latitude = sites[site_number]['Latitude']
longitude = sites[site_number]['Longitude']

start_utc = datetime.strptime('2022-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')
end_utc = datetime.strptime('2022-01-06 23:00:00', '%Y-%m-%d %H:%M:%S')

start_timestamp = start_utc.replace(tzinfo=timezone.utc).timestamp()
end_timestamp = end_utc.replace(tzinfo=timezone.utc).timestamp()

end_timestamp

1641510000.0

In [None]:
resp = req_irradiance_hist(api_key, lon=longitude, lat=latitude,
                      start_date=int(start_timestamp), end_date=int(end_timestamp))

Data acquisition failed due to 401 Client Error: Unauthorized for url: https://api.openweathermap.org/data/2.5/solar_radiation/history?appid=499817a2d0296f45499bb26bf6039e01&lat=21.446911&lon=-158.188736&start=1640995200&end=1641510000


In [None]:
df_weather = pd.DataFrame()
df_week = pd.DataFrame()

start = start_utc
end = end_utc

#for i in range(1, 36):
for i in range(1, 2):
  #start = start + timedelta(days = i*i)
  #end = end + timedelta(days = i*i)
  print(start, end)
  start_timestamp = start.replace(tzinfo=timezone.utc).timestamp()
  end_timestamp = end.replace(tzinfo=timezone.utc).timestamp()
  start = end  + timedelta(hours = 1)
  end = end + timedelta(days = 6)

  resp = req_hist_data(api_key= api_key, lon=longitude, lat=latitude,
                      start_date=int(start_timestamp), end_date=int(end_timestamp))

  json_data = json.loads(resp.text)

  df_week = pd.json_normalize(json_data['list'])
  #change formating of epoch time to UTC
  df_week.dt = df_week.dt.apply(datetime.fromtimestamp)

  #df_weather = df_weather.append(df_week, ignore_index= True)
  df_weather = pd.concat([df_weather, df_week], ignore_index=True)

df_weather