Initial API test

In [34]:
import requests
import sqlite3
import time


lat = '51.5072'
lon = '0.1276'
now = '1708622726'#int(time.time())
from key import key

link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={now}&appid={key}&units=metric'
conn = sqlite3.connect('weather.db')
response = requests.get(link)
response.text

'{"lat":51.5072,"lon":0.1276,"timezone":"Europe/London","timezone_offset":0,"data":[{"dt":1708622726,"sunrise":1708585263,"sunset":1708622726,"temp":4.83,"feels_like":-0.57,"pressure":981,"humidity":90,"dew_point":3.33,"clouds":100,"visibility":10000,"wind_speed":9.77,"wind_deg":280,"weather":[{"id":501,"main":"Rain","description":"moderate rain","icon":"10n"}],"rain":{"1h":1.78}}]}'

In [44]:
data = response.json()
data

{'lat': 51.5072,
 'lon': 0.1276,
 'timezone': 'Europe/London',
 'timezone_offset': 0,
 'data': [{'dt': 1708622726,
   'sunrise': 1708585263,
   'sunset': 1708622726,
   'temp': 4.83,
   'feels_like': -0.57,
   'pressure': 981,
   'humidity': 90,
   'dew_point': 3.33,
   'clouds': 100,
   'visibility': 10000,
   'wind_speed': 9.77,
   'wind_deg': 280,
   'weather': [{'id': 501,
     'main': 'Rain',
     'description': 'moderate rain',
     'icon': '10n'}],
   'rain': {'1h': 1.78}}]}

Cleaning the response data

Initially I didn't realise that the 'rain' and 'snow' data were given as dict because they are not always included.

This caused an SQLite dict error later, so I had to add in a cleaning step for this.

In [157]:
d1 = {'lat': data['lat'], 'lon': data['lon'], 'timezone': data['timezone'], 'timezone_offset': data['timezone_offset']}

for i in data['data']:
    d1.update(i)

for i in d1['weather']:
    d1.update(i)

d1.pop('weather')

data = d1

#if 'rain' in data.keys():
    #data['rain'] = list(data['rain'].values())[0]

if 'rain' in data.keys():
    data['rain'] = [*data['rain'].values()][0]

if 'snow' in data.keys():
    data['snow'] = [*data['snow'].values()][0]

data

{'lat': 51.5072,
 'lon': 0.1276,
 'timezone': 'Europe/London',
 'timezone_offset': 0,
 'dt': 1708622726,
 'sunrise': 1708585263,
 'sunset': 1708622726,
 'temp': 4.83,
 'feels_like': -0.57,
 'pressure': 981,
 'humidity': 90,
 'dew_point': 3.33,
 'uvi': 0,
 'clouds': 100,
 'visibility': 10000,
 'wind_speed': 9.77,
 'wind_deg': 280,
 'rain': 1,
 'id': 500,
 'main': 'Rain',
 'description': 'light rain',
 'icon': '10n'}

To avoid SQL insertion errors due to lack of data values, I added a step to include placeholders.

This is necessary because the API does not send keys with null values.

In [179]:
all_api_keys = ['lat', 'lon', 'timezone', 'timezone_offset', 'dt', 
                'sunrise', 'sunset', 'temp', 'feels_like', 'pressure', 
                'humidity', 'dew_point', 'uvi', 'clouds', 'visibility', 
                'wind_speed', 'wind_deg', 'wind_gust', 'id', 'main', 
                'description', 'icon', 'rain', 'snow']

for i in all_api_keys:
    if i not in data.keys():
        i = {i: 'N/A'}
        data.update(i)

data

{'lat': 51.5072,
 'lon': 0.1276,
 'timezone': 'Europe/London',
 'timezone_offset': 0,
 'dt': 1708622726,
 'sunrise': 1708585263,
 'sunset': 1708622726,
 'temp': 4.83,
 'feels_like': -0.57,
 'pressure': 981,
 'humidity': 90,
 'dew_point': 3.33,
 'uvi': 0,
 'clouds': 100,
 'visibility': 10000,
 'wind_speed': 9.77,
 'wind_deg': 280,
 'rain': 1,
 'id': 500,
 'main': 'Rain',
 'description': 'light rain',
 'icon': '10n',
 'wind_gust': 'N/A',
 'snow': 'N/A'}

Placing all cleaning code into a function

In [23]:
def clean_data(data):

    d1 = {'lat': data['lat'], 'lon': data['lon'], 'timezone': data['timezone'], 'timezone_offset': data['timezone_offset']}

    for i in data['data']:
        d1.update(i)

    for i in d1['weather']:
        d1.update(i)

    d1.pop('weather')

    data = d1

    if 'rain' in data.keys():
        data['rain'] = [*data['rain'].values()][0]

    if 'snow' in data.keys():
        data['snow'] = [*data['snow'].values()][0]

    all_api_keys = ['lat', 'lon', 'timezone', 'timezone_offset', 'dt', 
                'sunrise', 'sunset', 'temp', 'feels_like', 'pressure', 
                'humidity', 'dew_point', 'uvi', 'clouds', 'visibility', 
                'wind_speed', 'wind_deg', 'wind_gust', 'id', 'main', 
                'description', 'icon', 'rain', 'snow']
    
    for i in all_api_keys:
        if i not in data.keys():
            i = {i: 'N/A'}
            data.update(i)

    return data

data = clean_data(data)

data

Creating SQL table and loading values

In [9]:
conn = sqlite3.connect('weather.db')
cursor = conn.cursor()

cursor.executescript('''

        CREATE TABLE IF NOT EXISTS weather (
                    lat,
                    lon,
                    timezone,
                    timezone_offset,
                    date,
                    sunrise,
                    sunset,
                    temp,
                    feels_like,
                    pressure,
                    humidity,
                    dew_point,
                    uvi,
                    clouds,
                    visibility,
                    wind_speed,
                    wind_gust,
                    wind_deg,
                    weather_id,
                    main,
                    description,
                    icon,
                    rain,
                    snow     
        )
    ''')

cursor.execute('''INSERT INTO weather VALUES (:lat, :lon, :timezone, :timezone_offset,
               :dt, :sunrise, :sunset, :temp, :feels_like, :pressure, :humidity,
               :dew_point, :uvi, :clouds, :visibility, :wind_speed, :wind_gust, :wind_deg,
               :id, :main, :description, :icon, :rain, :snow)''', data
               )

conn.commit()

NameError: name 'sqlite3' is not defined

Quick test to see that everything is working so far

In [170]:
import pandas as pd

df = pd.read_sql_query('''SELECT * FROM weather''', conn)
df

Unnamed: 0,lat,lon,timezone,timezone_offset,date,sunrise,sunset,temp,feels_like,pressure,...,visibility,wind_speed,wind_gust,wind_deg,weather_id,main,description,icon,rain,snow
0,51.5072,0.1276,Europe/London,0,1708622726,1708585263,1708622726,4.83,-0.57,981,...,10000,9.77,,280,500,Rain,light rain,10n,1,


Creating drop table function

In [11]:
def drop_table():

    cursor.executescript('''

        DROP TABLE IF EXISTS weather

    ''')

    conn.commit()

drop_table()

Adding a last executed file so the database can be updated efficiently

In [3]:
def last_executed():

        try:
                with open('last_executed.txt') as f:
                        last_execution_date = int(f.read())
        except FileNotFoundError:
                last_execution_date = int(time.time())

        return last_execution_date

last_execution_date = last_executed()

In [21]:
def update_executed():
    with open('last_executed.txt', 'w') as f:
        f.write(str(int(time.time())))

Creating function to build a list of links for every hour since the code was last executed

In [16]:
def get_links():
    
    if last_execution_date // 86400 ==  time.time() // 86400:
        return []

    links = []

    def build_link(date):
        lat = '51.5072'
        lon = '0.1276'
        from python.key import key
        link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'
        links.append(link)

    nearest_hour = int(time.time() - time.time() % 3600)
    nearest_hour_old = last_execution_date - last_execution_date % 3600

    for x in range(nearest_hour_old+3600, nearest_hour, 3600):
        build_link(x)
    
    return links

get_links()

['https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705345200&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705348800&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705352400&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705356000&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705359600&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.1276&dt=1705363200&appid=e276140c3e33a20f93a7c86e198a285e&units=metric',
 'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=51.5072&lon=0.12

Putting all the relevant code into a function for extracting and loading the data

In [19]:
conn = sqlite3.connect('weather.db')
cursor = conn.cursor()

def extract_load(links):

    cursor.executescript('''

            CREATE TABLE IF NOT EXISTS weather (
                     lat,
                     lon,
                     timezone,
                     timezone_offset,
                     date_time,
                     sunrise,
                     sunset,
                     temp,
                     feels_like,
                     pressure,
                     humidity,
                     dew_point,
                     uvi,
                     clouds,
                     visibility,
                     wind_speed,
                     wind_gust,
                     wind_deg,
                     weather_id,
                     main,
                     description,
                     icon,
                     rain,
                     snow     
            )
        ''')

    for link in links:

        response = requests.get(link)

        data = clean_data(response.json())

        cursor.execute('''INSERT INTO weather VALUES (:lat, :lon, :timezone, :timezone_offset,
                       :dt, :sunrise, :sunset, :temp, :feels_like, :pressure, :humidity, 
                       :dew_point, :uvi, :clouds, :visibility, :wind_speed, :wind_gust, :wind_deg, 
                       :id, :main, :description, :icon, :rain, :snow)''', data
                       )
        
        conn.commit()

Testing

In [24]:
#drop_table()

extract_load(get_links())

In [25]:
df = pd.read_sql_query('''SELECT * FROM weather''', conn)
df

Unnamed: 0,lat,lon,timezone,timezone_offset,date_time,sunrise,sunset,temp,feels_like,pressure,...,visibility,wind_speed,wind_gust,wind_deg,weather_id,main,description,icon,rain,snow
0,51.5072,0.1276,Europe/London,0,1705345200,1705305532,1705335481,0.18,-3.36,1012,...,10000,3.09,,290,801,Clouds,few clouds,02n,,
1,51.5072,0.1276,Europe/London,0,1705348800,1705305532,1705335481,0.11,-1.69,1012,...,10000,1.54,,290,801,Clouds,few clouds,02n,,
2,51.5072,0.1276,Europe/London,0,1705352400,1705305532,1705335481,-0.20,-3.33,1012,...,10000,2.57,,290,803,Clouds,broken clouds,04n,,
3,51.5072,0.1276,Europe/London,0,1705356000,1705305532,1705335481,-0.61,-4.31,1012,...,10000,3.09,,280,803,Clouds,broken clouds,04n,,
4,51.5072,0.1276,Europe/London,0,1705359600,1705305532,1705335481,-1.30,-4.05,1012,...,10000,2.06,,250,803,Clouds,broken clouds,04n,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,51.5072,0.1276,Europe/London,0,1708743600,1708757816,1708795741,2.07,-0.11,991,...,10000,2.06,,220,804,Clouds,overcast clouds,04n,,
945,51.5072,0.1276,Europe/London,0,1708747200,1708757816,1708795741,1.40,-0.90,991,...,10000,2.06,,220,803,Clouds,broken clouds,04n,,
946,51.5072,0.1276,Europe/London,0,1708750800,1708757816,1708795741,1.22,-1.65,992,...,10000,2.57,,230,803,Clouds,broken clouds,04n,,
947,51.5072,0.1276,Europe/London,0,1708754400,1708757816,1708795741,1.09,-0.57,992,...,10000,1.54,,210,802,Clouds,scattered clouds,03n,,


First plot

In [26]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

def plot_temp_feels_like(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Feels like temp }^\circ C\]')

    p.circle(x='temp',
             y='feels_like', source=source, fill_color='blue')

    show(p)

plot_temp_feels_like(df)

In [27]:
def plot_temp_pressure(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Pressure }hPa\]')

    p.circle(x='temp',
             y='pressure', source=source, fill_color='blue')

    show(p)

plot_temp_pressure(df)

In [33]:
def plot_wind_s_d(data):
 
    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Wind Direction }\]',
                y_axis_label=r'\[\text{ Wind Speed }ms^{-1}\]')

    p.circle(x='wind_deg',
            y='wind_speed', source=source, fill_color='blue')

    show(p)

plot_wind_s_d(df)

In [1]:
def load_old_data():
     df = read_sql_query('''SELECT * FROM weather''', conn)
     first_date = df['date_time'][0]
     old_date = first_date - 3600 * 950
     return old_date

load_old_data()

NameError: name 'df' is not defined

Code so far - run from here to avoid issues

In [3]:
import requests
import sqlite3
import time
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource


conn = sqlite3.connect('weather.db')
cursor = conn.cursor()


def last_executed():

        try:
                with open('last_executed.txt') as f:
                        last_execution_date = int(f.read())
        except FileNotFoundError:
                last_execution_date = int(time.time())

        return last_execution_date


def update_executed():
    with open('last_executed.txt', 'w') as f:
        f.write(str(int(time.time())))


def get_links(last_execution_date):

    if last_execution_date // 86400 ==  time.time() // 86400:
        return []

    links = []

    def build_link(date):
        lat = '51.5072'
        lon = '0.1276'
        from key import key
        link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'
        links.append(link)

    nearest_hour = int(time.time() - time.time() % 3600)
    nearest_hour_old = last_execution_date - last_execution_date % 3600

    for x in range(nearest_hour_old, nearest_hour, 3600):
        build_link(x)
    
    return links


def get_links_past(x):

    links = []

    def build_link(date):
        lat = '51.5072'
        lon = '0.1276'
        from key import key
        link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'
        links.append(link)

    df = pd.read_sql_query('''SELECT date_time FROM weather''', conn)
    first_date = df['date_time'].min()
    past_date = first_date - 3600 * x

    for x in range(past_date, first_date, 3600):
        build_link(x)
    
    return links


def clean_data(data):

    d1 = {
         'lat': data['lat'],
         'lon': data['lon'],
         'timezone': data['timezone'],
         'timezone_offset': data['timezone_offset']
          }

    for i in data['data']:
        d1.update(i)

    for i in d1['weather']:
        d1.update(i)

    d1.pop('weather')

    data = d1

    if 'rain' in data.keys():
        data['rain'] = [*data['rain'].values()][0]

    if 'snow' in data.keys():
        data['snow'] = [*data['snow'].values()][0]

    all_api_keys = [
         'lat', 'lon', 'timezone', 'timezone_offset', 'dt', 
         'sunrise', 'sunset', 'temp', 'feels_like', 'pressure', 
         'humidity', 'dew_point', 'uvi', 'clouds', 'visibility', 
         'wind_speed', 'wind_deg', 'wind_gust', 'id', 'main', 
         'description', 'icon', 'rain', 'snow'
                    ]
    
    for i in all_api_keys:
        if i not in data.keys():
            i = {i: 'N/A'}
            data.update(i)

    return data


def extract_load(links):

    cursor.executescript('''

            CREATE TABLE IF NOT EXISTS weather (
                     lat,
                     lon,
                     timezone,
                     timezone_offset,
                     date_time,
                     sunrise,
                     sunset,
                     temp,
                     feels_like,
                     pressure,
                     humidity,
                     dew_point,
                     uvi,
                     clouds,
                     visibility,
                     wind_speed,
                     wind_gust,
                     wind_deg,
                     weather_id,
                     main,
                     description,
                     icon,
                     rain,
                     snow     
            )
        ''')

    for link in links:

        response = requests.get(link)

        data = clean_data(response.json())

        cursor.execute('''INSERT INTO weather VALUES (:lat, :lon, :timezone, :timezone_offset,
                       :dt, :sunrise, :sunset, :temp, :feels_like, :pressure, :humidity, 
                       :dew_point, :uvi, :clouds, :visibility, :wind_speed, :wind_gust, :wind_deg, 
                       :id, :main, :description, :icon, :rain, :snow)''', data
                       )
        
        conn.commit()


def plot_temp_feels_like(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Feels like temp }^\circ C\]')

    p.circle(x='temp',
             y='feels_like', source=source, fill_color='blue')

    show(p)


def plot_temp_pressure(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Pressure }hPa\]')

    p.circle(x='temp',
             y='pressure', source=source, fill_color='blue')

    show(p)


def plot_wind_s_d(data):
 
    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Wind Direction }\]',
               y_axis_label=r'\[\text{ Wind Speed }ms^{-1}\]')

    p.circle(x='wind_deg',
             y='wind_speed', source=source, fill_color='blue')

    show(p)

def plot_temp_humidity(data):
 
    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Humidity }\%\]')

    p.circle(x='temp',
             y='humidity', source=source, fill_color='blue')

    #p.circle(x='feels_like',
             #y='humidity', source=source, fill_color='green')
    
    show(p)

In [12]:
extract_load(get_links_past(900))

In [5]:
extract_load(get_links(last_executed()))

In [6]:
update_executed()

In [9]:
df[df.duplicated() == True]

Unnamed: 0,index,lat,lon,timezone,timezone_offset,date_time,sunrise,sunset,temp,feels_like,...,visibility,wind_speed,wind_gust,wind_deg,weather_id,main,description,icon,rain,snow


In [8]:
df = pd.read_sql_query('''SELECT * FROM weather''', conn)
df = df.sort_values('date_time').reset_index()
df.head(10)

Unnamed: 0,index,lat,lon,timezone,timezone_offset,date_time,sunrise,sunset,temp,feels_like,...,visibility,wind_speed,wind_gust,wind_deg,weather_id,main,description,icon,rain,snow
0,7713,51.5072,0.1276,Europe/London,0,1676548800,1676531547,1676567699,11.24,10.69,...,10000,4.63,,230,804,Clouds,overcast clouds,04d,,
1,7714,51.5072,0.1276,Europe/London,0,1676552400,1676531547,1676567699,11.5,10.98,...,10000,5.14,,220,804,Clouds,overcast clouds,04d,,
2,7715,51.5072,0.1276,Europe/London,0,1676556000,1676531547,1676567699,12.01,11.56,...,10000,4.63,,240,804,Clouds,overcast clouds,04d,,
3,7716,51.5072,0.1276,Europe/London,0,1676559600,1676531547,1676567699,12.01,11.59,...,10000,4.12,,240,804,Clouds,overcast clouds,04d,,
4,7717,51.5072,0.1276,Europe/London,0,1676563200,1676531547,1676567699,12.67,12.21,...,10000,3.6,,240,804,Clouds,overcast clouds,04d,,
5,7718,51.5072,0.1276,Europe/London,0,1676566800,1676531547,1676567699,12.97,12.46,...,10000,5.14,,250,803,Clouds,broken clouds,04d,,
6,7719,51.5072,0.1276,Europe/London,0,1676570400,1676531547,1676567699,12.12,11.66,...,10000,5.14,,250,500,Rain,light rain,10n,0.1,
7,7720,51.5072,0.1276,Europe/London,0,1676574000,1676531547,1676567699,11.84,11.38,...,10000,4.63,,240,803,Clouds,broken clouds,04n,,
8,7721,51.5072,0.1276,Europe/London,0,1676577600,1676531547,1676567699,11.73,11.18,...,10000,5.7,0.0,250,801,Clouds,few clouds,02n,,
9,7722,51.5072,0.1276,Europe/London,0,1676581200,1676531547,1676567699,10.96,10.33,...,10000,5.14,,240,800,Clear,clear sky,01n,,


In [10]:
df.corr()

  df.corr()


Unnamed: 0,index,lat,lon,timezone_offset,date_time,sunrise,sunset,temp,feels_like,pressure,humidity,dew_point,clouds,wind_speed,wind_deg,weather_id
index,1.0,,,0.414326,-0.875671,-0.875694,-0.87565,0.105886,0.106502,0.133359,-0.148398,0.041644,-0.035595,-0.094326,-0.189763,-0.017542
lat,,,,,,,,,,,,,,,,
lon,,,,,,,,,,,,,,,,
timezone_offset,0.414326,,,1.0,-0.533801,-0.533972,-0.533376,0.668725,0.674619,0.211765,-0.354455,0.60197,-0.173006,-0.107549,-0.135553,0.073032
date_time,-0.875671,,,-0.533801,1.0,0.999997,0.999997,-0.22582,-0.229126,-0.186138,0.203979,-0.155604,0.08037,0.071488,0.128266,0.00717
sunrise,-0.875694,,,-0.533972,0.999997,1.0,1.0,-0.226444,-0.22971,-0.186136,0.204895,-0.15585,0.080508,0.071053,0.128308,0.007207
sunset,-0.87565,,,-0.533376,0.999997,1.0,1.0,-0.225898,-0.229163,-0.186069,0.204629,-0.155348,0.080394,0.071004,0.128213,0.007289
temp,0.105886,,,0.668725,-0.22582,-0.226444,-0.225898,1.0,0.993246,0.045309,-0.547242,0.882717,-0.12909,0.179057,0.006981,0.040641
feels_like,0.106502,,,0.674619,-0.229126,-0.22971,-0.229163,0.993246,1.0,0.044803,-0.503351,0.899898,-0.118448,0.136199,-0.001363,0.038729
pressure,0.133359,,,0.211765,-0.186138,-0.186136,-0.186069,0.045309,0.044803,1.0,-0.23765,-0.068967,-0.058208,-0.189009,-0.169042,0.371107


In [11]:
plot_temp_feels_like(df)
plot_temp_pressure(df)
plot_wind_s_d(df)
plot_temp_humidity(df)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4862 entries, 0 to 4861
Data columns (total 25 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   index            4862 non-null   int64  
 1   lat              4862 non-null   float64
 2   lon              4862 non-null   float64
 3   timezone         4862 non-null   object 
 4   timezone_offset  4862 non-null   int64  
 5   date_time        4862 non-null   int64  
 6   sunrise          4862 non-null   int64  
 7   sunset           4862 non-null   int64  
 8   temp             4862 non-null   float64
 9   feels_like       4862 non-null   float64
 10  pressure         4862 non-null   int64  
 11  humidity         4862 non-null   int64  
 12  dew_point        4862 non-null   float64
 13  uvi              4862 non-null   object 
 14  clouds           4862 non-null   int64  
 15  visibility       4862 non-null   object 
 16  wind_speed       4862 non-null   float64
 17  wind_gust     

In [16]:
def build_link_mistake(date):
    lat = '51.5072'
    lon = '0.1276'
    from python.key import key
    link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'

    return link

link_mistake = build_link_mistake(1708966800)

In [17]:
response = requests.get(link_mistake)

data_mistake = clean_data(response.json())

data_mistake

{'lat': 51.5072,
 'lon': 0.1276,
 'timezone': 'Europe/London',
 'timezone_offset': 0,
 'dt': 1708966800,
 'sunrise': 1708930365,
 'sunset': 1708968756,
 'temp': 7.75,
 'feels_like': 4.72,
 'pressure': 1014,
 'humidity': 63,
 'dew_point': 1.16,
 'uvi': 0,
 'clouds': 20,
 'visibility': 10000,
 'wind_speed': 5.14,
 'wind_deg': 30,
 'wind_gust': 12.35,
 'id': 801,
 'main': 'Clouds',
 'description': 'few clouds',
 'icon': '02d',
 'rain': 'N/A',
 'snow': 'N/A'}

In [18]:
cursor.execute('''INSERT INTO weather VALUES (:lat, :lon, :timezone, :timezone_offset,
                :dt, :sunrise, :sunset, :temp, :feels_like, :pressure, :humidity, 
                :dew_point, :uvi, :clouds, :visibility, :wind_speed, :wind_gust, :wind_deg, 
                :id, :main, :description, :icon, :rain, :snow)''', data_mistake
                )

conn.commit()

In [4]:
from datetime import datetime

datetime.utcfromtimestamp(last_execution_date).strftime('%Y-%m-%dT%H:%M:%SZ')

'2024-02-24T09:51:01Z'

Changing from SQLite to MySQL

In [11]:
import mysql.connector
#from key import sql_pass

db = mysql.connector.connect(
    host='localhost',
    user='root',
    passwd='DYxKavh#4S29%SQqQj&@',
    database='test'
)
cursor = db.cursor()


In [47]:
import requests
import mysql.connector
import time
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
#from key import sql_pass
from key import key


db = mysql.connector.connect(
    host='localhost',
    user='root',
    passwd='DYxKavh#4S29%SQqQj&@',
    database='test'
)
cursor = db.cursor()


def last_executed():

        try:
                with open('last_executed.txt') as f:
                        last_execution_date = int(f.read())
        except FileNotFoundError:
                last_execution_date = int(time.time())

        return last_execution_date


def update_executed():
    with open('last_executed.txt', 'w') as f:
        f.write(str(int(time.time())))


def get_links(last_execution_date):

    if last_execution_date // 86400 ==  time.time() // 86400:
        return []

    links = []

    def build_link(date):
        lat = '51.5072'
        lon = '0.1276'
        link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'
        links.append(link)

    nearest_hour = int(time.time() - time.time() % 3600)
    nearest_hour_old = last_execution_date - last_execution_date % 3600

    for x in range(nearest_hour_old, nearest_hour, 3600):
        build_link(x)
    
    return links


def get_links_past(x):

    links = []

    def build_link(date):
        lat = '51.5072'
        lon = '0.1276'
        from python.key import key
        link = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={date}&appid={key}&units=metric'
        links.append(link)

    df = pd.read_sql_query('''SELECT date_time FROM weather''', db)
    first_date = df['date_time'].min()
    past_date = first_date - 3600 * x

    for x in range(past_date, first_date, 3600):
        build_link(x)
    
    return links


def clean_data(data):

    d1 = {
         'lat': data['lat'],
         'lon': data['lon'],
         'timezone': data['timezone'],
         'timezone_offset': data['timezone_offset']
          }

    for i in data['data']:
        d1.update(i)

    for i in d1['weather']:
        d1.update(i)

    d1.pop('weather')

    data = d1

    if 'rain' in data.keys():
        data['rain'] = [*data['rain'].values()][0]

    if 'snow' in data.keys():
        data['snow'] = [*data['snow'].values()][0]

    all_api_keys = [
         'lat', 'lon', 'timezone', 'timezone_offset', 'dt', 
         'sunrise', 'sunset', 'temp', 'feels_like', 'pressure', 
         'humidity', 'dew_point', 'uvi', 'clouds', 'visibility', 
         'wind_speed', 'wind_deg', 'wind_gust', 'id', 'main', 
         'description', 'icon', 'rain', 'snow'
                    ]
    
    for i in all_api_keys:
        if i not in data.keys():
            i = {i: 'N/A'}
            data.update(i)

    return data


def extract_load(links):

    cursor.execute('''

            CREATE TABLE IF NOT EXISTS weather (
                     lat FLOAT(4),
                     lon FLOAT(4),
                     timezone VARCHAR(15),
                     timezone_offset INT(3),
                     dt INT(15),
                     sunrise INT(15),
                     sunset INT(15),
                     temp FLOAT(2),
                     feels_like FLOAT(2),
                     pressure INT(4),
                     humidity INT(3),
                     dew_point FLOAT(2),
                     uvi INT(2),
                     clouds INT(3),
                     visibility INT(5),
                     wind_speed FLOAT(2),
                     wind_gust FLOAT(2),
                     wind_deg INT(3),
                     weather_id INT(3),
                     main VARCHAR(20),
                     description VARCHAR(30),
                     icon VARCHAR(3),
                     rain FLOAT(2),
                     snow FLOAT(2)     
            )
        ''')

    for link in links:

        response = requests.get(link)

        data = clean_data(response.json())
        placeholders = ', '.join(['%s']) * len(data)
        columns = ', '.join(data.keys())
        sql = f"INSERT INTO weather {columns} VALUES {placeholders}"
        cursor.execute(sql, list(data.values()))
        
        #cursor.execute('''INSERT INTO weather (lat,lon,timezone,timezone_offset,
                       #date_time,sunrise,sunset,temp,feels_like,pressure,humidity,
                      # dew_point,uvi,clouds,visibility,wind_speed,wind_gust,
                      # wind_deg,weather_id,main,description,icon,rain,snow) 
                      # VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
                       #%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''', values
                      # )
        
        db.commit()


def plot_temp_feels_like(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Feels like temp }^\circ C\]')

    p.circle(x='temp',
             y='feels_like', source=source, fill_color='blue')

    show(p)


def plot_temp_pressure(data):

    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Pressure }hPa\]')

    p.circle(x='temp',
             y='pressure', source=source, fill_color='blue')

    show(p)


def plot_wind_s_d(data):
 
    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Wind Direction }\]',
               y_axis_label=r'\[\text{ Wind Speed }ms^{-1}\]')

    p.circle(x='wind_deg',
             y='wind_speed', source=source, fill_color='blue')

    show(p)

def plot_temp_humidity(data):
 
    source = ColumnDataSource(data)

    p = figure(x_axis_label=r'\[\text{ Temperature }^\circ C\]',
               y_axis_label=r'\[\text{ Humidity }\%\]')

    p.circle(x='temp',
             y='humidity', source=source, fill_color='blue')

    #p.circle(x='feels_like',
             #y='humidity', source=source, fill_color='green')
    
    show(p)

In [48]:
extract_load(get_links(last_executed()))

ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'lat, lon, timezone, timezone_offset, dt, sunrise, sunset, temp, feels_like, pres' at line 1