Using SQL to update weather data

In [1]:
# sqlite3 library to run operations on the database
import sqlite3

gen_path = '/home/imants/gits/COVID-19/'

db_path = gen_path + 'sql/weather.db'

# connecting to the weather database
con = sqlite3.connect(db_path)

# establishing database cursor
cursorObj = con.cursor()


In [2]:
# Getting table names

table_names = cursorObj.execute("SELECT name FROM sqlite_master WHERE type='table';")

table_names = table_names.fetchall()

# Taking the table names that were fetched, and making a usable lists out of the values.

US_names = []
global_names = []

for x in range(len(table_names)):
    if (table_names[x][0][-2:] == 'US'):
        US_names.append(table_names[x][0])
        
    else:
        global_names.append(table_names[x][0])
        
 
print('US names', US_names)

print('Global names', global_names)
    


US names ['cloud_US', 'dew_US', 'humidity_US', 'ozone_US', 'precip_US', 'pressure_US', 'sunrise_US', 'sunset_US', 'tMax_US', 'tMin_US', 'uv_US', 'wind_US']
Global names ['cloud_Global', 'dew_Global', 'humidity_Global', 'ozone_Global', 'precip_Global', 'pressure_Global', 'sunrise_Global', 'sunset_Global', 'tMax_Global', 'tMin_Global', 'uv_Global', 'wind_Global']


In [3]:
# Reading API key

key_path = '/home/imants/gits/COVID-19/key.txt'

api_file = open(key_path, 'r')
API_KEY = api_file.read()
api_file.close()
API_KEY = API_KEY.strip()

API_KEY

'a345c6f27694f89e4ea471f0f116adba'

In [4]:
# Creating API call function

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime as dt

def call_api (lat, long, time):
    
    # Building the URL for the API get
    url = 'https://api.darksky.net/forecast/' + API_KEY + '/' + lat + "," + long + ',' + time
    url = url + '?exclude=currently,flags&units=si'
    print(url)
    
    # Getting the API call
    # using the retry error handling established above
    response = http.get(url)
    
    # Putting the API response into the JSON thing
    info = json.loads(response.content)
    #info = url
    
    return info

In [5]:
# Sorting out JSON response from API call


def sortOut (info):

    # Dummy value
    dummy = '-1000'

    # dictionary of what will be pulled from JSON
    return_values = {'tMax': '',
                    'tMin': '',
                    'hum': '',
                    'uvee': '',
                    'clouds': '',
                    'precip': '',
                    'dew': '',
                    'pressure': '',
                    'wind': '',
                    'ozone': '',
                    'sunrise': '',
                    'sunset': ''}

    print(info['daily']['data'][0])
    try:
    
        # Making a variable to more easily acccess JSON response data
        easy_info = info['daily']['data'][0]

        # Reading the JSON data
        return_values['tMax'] = str(easy_info['temperatureHigh'])
        return_values['tMin'] = str(easy_info['temperatureLow'])
        return_values['hum'] = str(easy_info['humidity'] * 100)
        return_values['uvee'] = str(easy_info['uvIndex'])
        return_values['clouds'] = str(easy_info['cloudCover'] * 100)
        return_values['precip'] = str(easy_info['precipProbability'] * 100)
        return_values['dew'] = str(easy_info['dewPoint'])
        return_values['pressure'] = str(easy_info['pressure'])
        return_values['wind'] = str(easy_info['windSpeed'])
        return_values['ozone'] = str(easy_info['ozone'])
        return_values['sunrise'] = str(easy_info['sunriseTime'])
        return_values['sunset'] = str(easy_info['sunsetTime'])

    except:

        # Creating dummy values in case of error
        print('Error encountered')
        
        for x in return_values:
            return_values[x] = dummy
    
    return return_values

In [11]:
# Going through tables and adding data to new dates

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime as dt

# Trying to capture sqlite error messages
# found https://stackoverflow.com/questions/25371636/how-to-get-sqlite-result-error-codes-in-python
import traceback
import sys

# Code to increase number of retries on connection errors,
# and also to give it some time.
# Found on https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
# And https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

s = requests.Session()

retries = Retry(total=30,
                backoff_factor=0.1,
                status_forcelist=[ 429, 500, 502, 503, 504 ],
                method_whitelist=["HEAD", "GET", "OPTIONS"])

adapter = HTTPAdapter(max_retries=retries)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)


# Making list of columns that need to be updated from prior update, above
columns_to_update = []

for x in range(13, 32):
    
    day_str = str(x)
    date_str = '"8/' + day_str + '/20"'
    columns_to_update.append(date_str)

# columns_to_update

for day in columns_to_update:
    
    print(day)
    
    # Querying all the latitudes and longitudes for the given day
    query_str = 'SELECT Lat, Long_, {} FROM {}'.format(day, US_names[0])
    print(query_str)
    cursorObj.execute(query_str)
    
    # putting query results into a variable 
    #rows = cursorObj.fetchall()
    
    # Create Unix time stamp out of the date column
    # stripping quotation marks from column name
    i = len(day)
    col_date = day[1:(i-1)]
    
    print(col_date)
    
    t = pd.to_datetime(col_date)
    t = int(t.value / 10**9)
    t = str(t)
    
    print(t)
    
    for row in cursorObj:
        print(row[0])
        print(row[1])
        print(row[2])
        
        latitude = str(row[0])
        longitude = str(row[1])
        
        print('Latitude: ', latitude, 'Longitude: ', longitude)
        
        # Calling API based on the row's latitude, longitude, and the day in question
        info = call_api(latitude, longitude, t)
        
        # print(info)
        
        # putting the JSON response into a dictionary
        weatherData = sortOut(info)
        
        try:
                   
            updateCursor = con.cursor()

            where_str = 'Lat = {} AND Long_ = {}'.format(latitude, longitude)

            update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('cloud_US', day, weatherData['clouds'], where_str)
            # update_str = "'''"+update_str+"'''"
            updateCursor.execute(update_str)
            con.commit()

            print(update_str)
        
        # displaying error
        except sqlite3.Error as er:
            print('SQLite error: %s' % (' '.join(er.args)))
            print("Exception class is: ", er.__class__)
            print('SQLite traceback: ')
            exc_type, exc_value, exc_tb = sys.exc_info()
            print(traceback.format_exception(exc_type, exc_value, exc_tb))
        
        
        
        break
    
    break

"8/13/20"
SELECT Lat, Long_, "8/13/20" FROM cloud_US
8/13/20
1597276800
32.53952745
-86.64408227
None
Latitude:  32.53952745 Longitude:  -86.64408227
https://api.darksky.net/forecast/a345c6f27694f89e4ea471f0f116adba/32.53952745,-86.64408227,1597276800?exclude=currently,flags&units=si
{'time': 1597208400, 'summary': 'Humid throughout the day.', 'icon': 'rain', 'sunriseTime': 1597230600, 'sunsetTime': 1597278960, 'moonPhase': 0.79, 'precipIntensity': 0.0427, 'precipIntensityMax': 0.2521, 'precipIntensityMaxTime': 1597280520, 'precipProbability': 0.26, 'precipType': 'rain', 'temperatureHigh': 33.53, 'temperatureHighTime': 1597260660, 'temperatureLow': 22.36, 'temperatureLowTime': 1597313280, 'apparentTemperatureHigh': 38.49, 'apparentTemperatureHighTime': 1597259220, 'apparentTemperatureLow': 23.39, 'apparentTemperatureLowTime': 1597313400, 'dewPoint': 22.5, 'humidity': 0.77, 'pressure': 1015.9, 'windSpeed': 1.07, 'windGust': 2.98, 'windGustTime': 1597208400, 'windBearing': 265, 'cloudCov

In [None]:
# update US locations

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime as dt

# Code to increase number of retries on connection errors,
# and also to give it some time.
# Found on https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
# And https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

s = requests.Session()

retries = Retry(total=30,
                backoff_factor=0.1,
                status_forcelist=[ 429, 500, 502, 503, 504 ],
                method_whitelist=["HEAD", "GET", "OPTIONS"])

adapter = HTTPAdapter(max_retries=retries)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)

# Dummy value in case of errors
dummy = '-1000'

# Making list of columns that need to be updated from prior update, above
columns_to_update = []

# Counting API calls
counter = 0

for x in range(13, 32):
    
    day_str = str(x)
    date_str = '"8/' + day_str + '/20"'
    columns_to_update.append(date_str)

for col_name in columns_to_update:
    
    tables = []
        
    for tab_name in US_names:
        
        query_string = 'SELECT Lat, "Long_", {} FROM {}'.format(col_name, tab_name)
        # print(query_string)
        cursorObj.execute(query_string)
        tables.append(cursorObj.fetchall())
        
        # names_test = cursorObj.execute(query_string).description
        # print(names_test[2][0], col_name)
        
    # print(len(tables[0]))
   
    # Create Unix time stamp out of the date column
    # stripping quotation marks from column name
    i = len(col_name)
    col_date = col_name[1:(i-1)]
    
    t = pd.to_datetime(col_date)
    t = int(t.value / 10**9)
    t = str(t)
    
    
    for y in range(len(tables[0])):
        
        latitude = str(tables[0][y][0])
        longitude = str(tables[0][y][1])
        
        # Calling API with latitude, longitude, and unix time
        info = call_api(latitude, longitude, t)
        
        print(counter)
        counter = counter + 1
        
        # print(info)
        print('Latitude: ', latitude, ', Longitude: ', longitude, ', Date: ', col_name)
        print('Unix time: ', t)
        
        
        # US names: [0: 'cloud_US', 1: 'dew_US', 2: 'humidity_US', 3: 'ozone_US', 4: 'precip_US', 5: 'pressure_US', 
        #           6: 'sunrise_US', 7: 'sunset_US', 8: 'tMax_US', 9: 'tMin_US', 10: uv_US', 11: 'wind_US']
       
        
        # getting values from the JSON response
        # adding error handling in case something is wrong with the JSON response
        try:

            # Making a variable to more easily acccess JSON response data
            easy_info = info['daily']['data'][0]

            # Reading the JSON data
            tMax = str(easy_info['temperatureHigh'])
            tMin = str(easy_info['temperatureLow'])
            hum = str(easy_info['humidity'] * 100)
            uvee = str(easy_info['uvIndex'])
            clouds = str(easy_info['cloudCover'] * 100)
            precip = str(easy_info['precipProbability'] * 100)
            dew = str(easy_info['dewPoint'])
            pressure = str(easy_info['pressure'])
            wind = str(easy_info['windSpeed'])
            ozone = str(easy_info['ozone'])
            sunrise = str(easy_info['sunriseTime'])
            sunset = str(easy_info['sunsetTime'])

        except:

            # Creating dummy values in case of error
            print('Error encountered')
            tMax = dummy
            tMin = dummy
            hum = dummy
            uvee = dummy
            clouds = dummy
            precip = dummy
            dew = dummy
            pressure = dummy
            wind = dummy
            ozone = dummy
            sunrise = dummy
            sunset = dummy

        
        where_str = 'Lat = {} AND Long_ = {}'.format(latitude, longitude)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('cloud_US', col_name, clouds, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('dew_US', col_name, dew, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('humidity_US', col_name, hum, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('ozone_US', col_name, ozone, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('precip_US', col_name, precip, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('pressure_US', col_name, pressure, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('sunrise_US', col_name, sunrise, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('sunset_US', col_name, sunset, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('tMax_US', col_name, tMax, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('tMin_US', col_name, tMin, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('uv_US', col_name, uvee, where_str)
        # cursorObj.execute(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('wind_US', col_name, wind, where_str)
        # cursorObj.execute(update_str)
        
        
        
        
        # print(iteration[y][2])
# test_string = 'SELECT Lat, "Long_", "8/13/20" FROM cloud_US'

# cursorObj.execute(test_string)

# the_table = cursorObj.fetchall()

#the_table[0]
#print(tables[0][0])
#names_test = tables[0].description

#print(names_test)

In [None]:
# test cell

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime as dt

# Code to increase number of retries on connection errors,
# and also to give it some time.
# Found on https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
# And https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/


# establishing writing cursor
cursorWrite = con.cursor()

# test API call
def call_api (lat, long, time):
    
    # Building the URL for the API get
    url = 'https://api.darksky.net/forecast/' + API_KEY + '/' + lat + "," + long + ',' + time
    url = url + '?exclude=currently,flags&units=si'

    # Getting the API call
    # using the retry error handling established above
    # response = http.get(url)
    
    # Putting the API response into the JSON thing
    #info = json.loads(response.content)
    info = url
    
    return info

# Dummy value in case of errors
dummy = '-1000'

# Making list of columns that need to be updated from prior update, above
columns_to_update = []

# Counting API calls
counter = 0

for x in range(13, 32):
    
    day_str = str(x)
    date_str = '"8/' + day_str + '/20"'
    columns_to_update.append(date_str)

for col_name in columns_to_update:
    
    tables = []
        
    for tab_name in US_names:
        
        query_string = 'SELECT Lat, "Long_", {} FROM {}'.format(col_name, tab_name)
        # print(query_string)
        cursorObj.execute(query_string)
        tables.append(cursorObj.fetchall())
        
        # names_test = cursorObj.execute(query_string).description
        # print(names_test[2][0], col_name)
        
    # print(len(tables[0]))
   
    # Create Unix time stamp out of the date column
    # stripping quotation marks from column name
    i = len(col_name)
    col_date = col_name[1:(i-1)]
    
    t = pd.to_datetime(col_date)
    t = int(t.value / 10**9)
    t = str(t)
    
    
    for y in range(len(tables[0])):
        
        latitude = str(tables[0][y][0])
        longitude = str(tables[0][y][1])
        
        # Calling API with latitude, longitude, and unix time
        info = call_api(latitude, longitude, t)
        
        print(counter)
        counter = counter + 1
        
        # print(info)
        print('Latitude: ', latitude, ', Longitude: ', longitude, ', Date: ', col_name)
        print('Unix time: ', t)
        
        
        # US names: [0: 'cloud_US', 1: 'dew_US', 2: 'humidity_US', 3: 'ozone_US', 4: 'precip_US', 5: 'pressure_US', 
        #           6: 'sunrise_US', 7: 'sunset_US', 8: 'tMax_US', 9: 'tMin_US', 10: uv_US', 11: 'wind_US']
       
        # dummy values
        tMax = str(counter * 10)
        tMin = str(counter * 11)
        hum = str(counter * 12)
        uvee = str(counter * 13)
        clouds = str(counter * 14)
        precip = str(counter * 15)
        dew = str(counter * 16)
        pressure = str(counter * 17)
        wind = str(counter * 18)
        ozone = str(counter * 19)
        sunrise = str(counter * 20)
        sunset = str(counter * 21)

        
        
        where_str = 'Lat = {} AND Long_ = {}'.format(latitude, longitude)
        
        update_str = '''UPDATE {} SET {} = {} WHERE {}'''.format('cloud_US', col_name, clouds, where_str)
        cursorWrite.execute(update_str)
        print(update_str)
        con.commit()
        
        
        # print(iteration[y][2])
# test_string = 'SELECT Lat, "Long_", "8/13/20" FROM cloud_US'

# cursorObj.execute(test_string)

# the_table = cursorObj.fetchall()

#the_table[0]
#print(tables[0][0])
#names_test = tables[0].description

#print(names_test)

In [None]:
# temporary home for this code



"""
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('dew_US', col_name, dew, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('humidity_US', col_name, hum, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('ozone_US', col_name, ozone, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('precip_US', col_name, precip, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('pressure_US', col_name, pressure, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('sunrise_US', col_name, sunrise, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('sunset_US', col_name, sunset, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('tMax_US', col_name, tMax, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('tMin_US', col_name, tMin, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('uv_US', col_name, uvee, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        
        update_str = 'UPDATE {} SET {} = {} WHERE {}'.format('wind_US', col_name, wind, where_str)
        cursorObj.execute(update_str)
        print(update_str)
        """



# adding error handling in case something is wrong with the JSON response
        try:

            # Making a variable to more easily acccess JSON response data
            easy_info = info['daily']['data'][0]

            # Reading the JSON data
            tMax = easy_info['temperatureHigh']
            tMin = easy_info['temperatureLow']
            hum = easy_info['humidity'] * 100
            uvee = easy_info['uvIndex']
            clouds = easy_info['cloudCover'] * 100
            precip = easy_info['precipProbability'] * 100
            dew = easy_info['dewPoint']
            pressure = easy_info['pressure']
            wind = easy_info['windSpeed']
            ozone = easy_info['ozone']
            sunrise = easy_info['sunriseTime']
            sunset = easy_info['sunsetTime']

        except:

            # Creating dummy values in case of error
            print('Error encountered')
            tMax = dummy
            tMin = dummy
            hum = dummy
            uvee = dummy
            clouds = dummy
            precip = dummy
            dew = dummy
            pressure = dummy
            wind = dummy
            ozone = dummy
            sunrise = dummy
            sunset = dummy


In [None]:
for x in range(len(the_table)):
    print(x)

In [None]:
# close database connection

con.close()

In [None]:
# getting table info

# query = 'SELECT * FROM {}'.format(US_names[0])
# cursorObj.execute(query)

# the_table = cursorObj.fetchall()

# the_table

In [None]:
# adding dates to tables
# current last date is 8/12/20

for x in range(14, 32):
    
    day_str = str(x)
    date_str = '"8/' + day_str + '/20"'
    
    for y in US_names:
        
        add_str = 'ALTER TABLE {} ADD COLUMN {} text'.format(y, date_str)
        cursorObj.execute(add_str)
        print(add_str)
    
    for y in global_names:
        
        add_str = 'ALTER TABLE {} ADD COLUMN {} text'.format(y, date_str)
        cursorObj.execute(add_str)
        print(add_str)