In [1]:
%pip install pyaurn



In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import requests
import pyaurn
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [30, 15]
import os

from concurrent.futures import ThreadPoolExecutor, wait
#from google.colab import drive
from shutil import make_archive, move
from math import sin, cos
from IPython.display import clear_output

In [40]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
metadata = pyaurn.importMeta().set_index("site_id")

Downloading meta data:: 32.0kB [00:00, 75.0kB/s]                            


In [4]:
display(metadata)
filtered_metadata = metadata.drop(["site_name", "parameter", "Parameter_name", "start_date", "end_date", "ratified_to", "zone", "agglomeration", "local_authority"], axis=1)

Unnamed: 0_level_0,site_name,location_type,latitude,longitude,parameter,Parameter_name,start_date,end_date,ratified_to,zone,agglomeration,local_authority
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ABD,Aberdeen,Urban Background,57.157360,-2.094278,O3,Ozone,2003-08-01,2021-09-20,2021-09-20,North East Scotland,,Aberdeen City
ABD9,Aberdeen Erroll Park,Urban Background,57.157400,-2.094770,O3,Ozone,2021-10-01,ongoing,2022-09-30,North East Scotland,,Aberdeen City
ABD7,Aberdeen Union Street Roadside,Urban Traffic,57.144555,-2.106472,NO,Nitric oxide,2008-01-01,ongoing,2022-09-30,North East Scotland,,Aberdeen City
ABD8,Aberdeen Wellington Road,Urban Traffic,57.133888,-2.094198,NO,Nitric oxide,2016-02-09,ongoing,2022-09-30,North East Scotland,,Aberdeen City
ARM6,Armagh Roadside,Urban Traffic,54.353728,-6.654558,NO,Nitric oxide,2009-01-01,ongoing,2022-09-30,Northern Ireland,,Armagh
...,...,...,...,...,...,...,...,...,...,...,...,...
WRAY,Wray,Rural Background,54.104666,-2.584182,O3,Ozone,1985-04-01,1988-02-29,1988-02-29,North West & Merseyside,,Lancaster
WREX,Wrexham,Urban Traffic,53.042282,-3.002829,NO,Nitric oxide,2002-03-06,ongoing,2022-09-30,North Wales,,Wrexham
YW,Yarner Wood,Rural Background,50.597600,-3.716510,O3,Ozone,1987-06-26,ongoing,2022-09-30,South West,,Teignbridge
YK10,York Bootham,Urban Background,53.967513,-1.086514,NO,Nitric oxide,2016-08-16,ongoing,2022-09-30,Yorkshire & Humberside,,York


In [5]:
def get_raw_data(name, year_range):
    data = pyaurn.importAURN(name, year_range).reset_index()
    data["site_id"] = name
    return { name: data }

In [6]:
def prepare_site_data(data):
  try:
    if "temp" in data.columns:
      data = data.copy()
      data["date"] = data["date"].apply(lambda e: e.timestamp())
      data = data.dropna()
      data = data.merge(filtered_metadata, on="site_id").drop(["site_id", "site", "code"], axis=1)
      return data
  except KeyError:
    return

In [7]:
display(metadata.index)
with ThreadPoolExecutor(min(32, os.cpu_count() + 4)) as executor:
  all_data = [executor.submit(get_raw_data, name, range(2021, 2022)) for name in metadata.index]
  all_data = wait(all_data)
  clear_output()

In [10]:
raw_data = [item.result() for item in all_data.done]
display(raw_data[2])

{'MH':                     date        O3     wd    ws  temp       site code site_id
 0    2021-01-01 00:00:00  61.22808    1.6  10.8   7.6  Mace Head   MH      MH
 1    2021-01-01 01:00:00  60.92872    6.3  10.8   7.7  Mace Head   MH      MH
 2    2021-01-01 02:00:00  60.84889   11.0  10.3   7.4  Mace Head   MH      MH
 3    2021-01-01 03:00:00  62.16606    4.9  10.2   7.3  Mace Head   MH      MH
 4    2021-01-01 04:00:00  62.92442    2.4  10.4   7.5  Mace Head   MH      MH
 ...                  ...       ...    ...   ...   ...        ...  ...     ...
 8755 2021-12-31 19:00:00  61.94653  179.5  12.8  11.5  Mace Head   MH      MH
 8756 2021-12-31 20:00:00  58.53388  175.5  13.5  11.7  Mace Head   MH      MH
 8757 2021-12-31 21:00:00  54.08347  167.4  13.9  11.7  Mace Head   MH      MH
 8758 2021-12-31 22:00:00  53.36502  167.2  14.0  11.9  Mace Head   MH      MH
 8759 2021-12-31 23:00:00  52.84614  172.9  14.1  11.9  Mace Head   MH      MH
 
 [8760 rows x 8 columns]}

In [11]:
usable_data = raw_data.copy()
display(usable_data[0])

{'COV2': Empty DataFrame
 Columns: [index, site_id]
 Index: []}

In [12]:
retrieved_data = {name: prepare_site_data(data) for item in usable_data for name, data in item.items()}
display(retrieved_data)

{'COV2': None,
 'ISL': None,
 'MH':               date        O3     wd    ws  temp     location_type   latitude  \
 0     1.609459e+09  61.22808    1.6  10.8   7.6  Rural Background  53.326444   
 1     1.609463e+09  60.92872    6.3  10.8   7.7  Rural Background  53.326444   
 2     1.609466e+09  60.84889   11.0  10.3   7.4  Rural Background  53.326444   
 3     1.609470e+09  62.16606    4.9  10.2   7.3  Rural Background  53.326444   
 4     1.609474e+09  62.92442    2.4  10.4   7.5  Rural Background  53.326444   
 ...            ...       ...    ...   ...   ...               ...        ...   
 8381  1.640977e+09  61.94653  179.5  12.8  11.5  Rural Background  53.326444   
 8382  1.640981e+09  58.53388  175.5  13.5  11.7  Rural Background  53.326444   
 8383  1.640984e+09  54.08347  167.4  13.9  11.7  Rural Background  53.326444   
 8384  1.640988e+09  53.36502  167.2  14.0  11.9  Rural Background  53.326444   
 8385  1.640992e+09  52.84614  172.9  14.1  11.9  Rural Background  53.326

In [36]:
def save_fig(model_name: str, figname):
    folder_path = f"./models/{model_name}"
    fig_filename = f"/{figname}.png"
    try:
        os.makedirs(folder_path)
    except FileExistsError:
        pass
    plt.savefig(folder_path + fig_filename)
    plt.close()

In [13]:
def split_dataset(dataset):
    dataset_len = len(dataset)
    train_end = round(dataset_len * 0.7)
    val_end = round(dataset_len * 0.85)

    train_dataset = dataset.iloc[0:train_end].copy()
    # 15% of original dataset (0.3 * 0.5)
    val_dataset = dataset.iloc[train_end:val_end].copy()
    # Take what's left
    test_dataset = dataset.iloc[val_end:dataset_len].copy()
    
    return { "train": train_dataset, "val": val_dataset, "test": test_dataset }

In [54]:
def compile_model(train_dataset: pd.DataFrame, val_dataset: pd.DataFrame, test_dataset: pd.DataFrame, name: str) -> tf.keras.Model:
    # Define the columns we know all datasets have
    target = "temp"
    common_numeric = ["date", "latitude", "longitude"]
    common_text = ["location_type", "code"]
    # Work out what other columns/metrics are in this dataset, assume they are numeric
    other_metrics = train_dataset.columns.difference(common_numeric + common_text + [target])
    numeric_cols = common_numeric + other_metrics.to_list()
    print(train_dataset.info())
    print(numeric_cols, train_dataset.columns)
    
    numeric_inputs = tf.keras.layers.Input(len(numeric_cols), name="_".join(numeric_cols))
    normalisation = tf.keras.layers.Normalization()
    normalisation.adapt(train_dataset[numeric_cols])
    normalisation = normalisation(numeric_inputs)

    dense = tf.keras.layers.Dense(32, activation="relu")(normalisation)
    dense_2 = tf.keras.layers.Dense(64, activation="relu")(dense)
    dense_3 = tf.keras.layers.Dense(32, activation="relu")(dense_2)

    outputs = tf.keras.layers.Dense(1)(dense_3)

    model = tf.keras.Model(inputs=numeric_inputs, outputs=outputs, name=name)
    model.summary()
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
        loss="mean_squared_error",
        metrics = ["mean_absolute_error"]
    )

    history = model.fit(
        train_dataset[numeric_cols],
        train_dataset["temp"],
        epochs=10,
        callbacks=[
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=1, min_lr=0.001),
        tf.keras.callbacks.EarlyStopping(monitor='loss', patience=1)
        ],
        validation_data=(val_dataset[numeric_cols], val_dataset["temp"])
    )

    plt.plot(history.history["loss"])
    save_fig(name, "loss")

    predictions = model.predict(test_dataset[numeric_cols]).flatten()

    display(predictions)
    plt.plot(test_dataset["temp"].to_list(), label="Truth")
    plt.plot(predictions, label="Predictions")
    save_fig(name, "predictions")
    
    return model



In [44]:
def get_trained_model(name, dataset: pd.DataFrame):
    if dataset is not None and not dataset.empty:
        splits = split_dataset(dataset)
        model = compile_model(splits["train"], splits["val"], splits["test"], name)
        model.save("models/temperature/" + name + "/model")
        return model
    

In [None]:
models = { name: get_trained_model(name, data) for name, data in retrieved_data.items() }

In [68]:
def get_model_serving_config(model: tf.keras.Model):
    return f"""config {{
        name: '{model.name}'
        base_path: '/models/temperature/{model.name}/model'
        model_platform: 'tensorflow'
    }}\n"""

In [69]:
# Created config following https://www.tensorflow.org/tfx/serving/serving_config#model_server_configuration
serving_config = f"""model_config_list {{
    {"    ".join([get_model_serving_config(model) for model in models.values() if model is not None ])}
}}"""
print(serving_config)
with open("models/models.config", "w") as config_file:
    config_file.write(serving_config)

model_config_list {
    config {
        name: 'MH'
        base_path: '/models/temperature/MH/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'SOUT'
        base_path: '/models/temperature/SOUT/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'STKR'
        base_path: '/models/temperature/STKR/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'MAN3'
        base_path: '/models/temperature/MAN3/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'NTN4'
        base_path: '/models/temperature/NTN4/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'PT4'
        base_path: '/models/temperature/PT4/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'SEND'
        base_path: '/models/temperature/SEND/model'
        model_platform: 'tensorflow'
    }
    config {
        name: 'STOK'
        base_path: '/models/temperature/STOK/mode

In [70]:
make_archive("models", "zip", "models")

'c:\\Users\\ltrowbridge\\Documents\\COMP3000-Project-Machine-Learning\\models.zip'

In [17]:
#drive.mount("/content/gdrive", force_remount=True)
model.save("/content/comp3000model")
zip_location = make_archive("model", "zip", "/content/comp3000model")
#move(zip_location, "/content/gdrive/MyDrive/comp3000model/model.zip")
#drive.flush_and_unmount()

INFO:tensorflow:Assets written to: /content/comp3000model\assets
