In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder


In [None]:
# Load dataset
df = pd.read_csv('data/AQbench_dataset.csv')
df.head()


In [None]:
# Label Encoding
str_columns = df.select_dtypes(include=['object']).columns.tolist()
str_columns = [col for col in str_columns if col != 'dataset']
label_encoders = {}
for col in str_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str).fillna('NaN'))
    label_encoders[col] = le
df.head()


In [None]:
def sine_cosine_encode(values, period=None):
    values_array = np.array(values)
    if period is None:
        period = values_array.max()
    sin_values = np.sin(2 * np.pi * values_array / period)
    cos_values = np.cos(2 * np.pi * values_array / period)
    return sin_values, cos_values
df['lonx'], df['lony'] = sine_cosine_encode(df['lon'], period=360)
df = df.drop('lon', axis=1)
df.head()


In [None]:
var_df = pd.read_csv('data/AQbench_variables.csv')
input_cols = var_df.loc[(var_df['input_target'] == 'input') & (var_df['column_name'] != 'lon'), 'column_name'].tolist()
if 'lon' in input_cols:
    input_cols.remove('lon')
input_cols += ['lonx', 'lony']
target_cols = var_df.loc[var_df['input_target'] == 'target', 'column_name'].tolist()
x_train = df[df['dataset'] == 'train'][input_cols]
y_train = df[df['dataset'] == 'train'][target_cols]
x_test = df[df['dataset'] == 'test'][input_cols]
y_test = df[df['dataset'] == 'test'][target_cols]
x_val = df[df['dataset'] == 'val'][input_cols]
y_val = df[df['dataset'] == 'val'][target_cols]
df = df.drop('dataset', axis=1)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape


In [None]:
# Kaggle TPU-compatible TensorFlow model generator
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
hyper_df = pd.read_csv('data/hyperparameters.csv')
# Connect to TPU
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    print('Running on TPU')
except Exception as e:
    tpu_strategy = tf.distribute.get_strategy()
    print('TPU not found, defaulting to', strategy)
def build_model(target):
    row = hyper_df[hyper_df['column_name'] == target].iloc[0]
    hidden_layers = eval(row['hidden layers'])
    activation = row['activation']
    loss = row['loss']
    lr = float(row['learning rate'])
    l2 = float(row['L2 lambda'])
    input_dim = x_train.shape[1]
    with tpu_strategy.scope():
        model = keras.Sequential()
        for h in hidden_layers:
            model.add(layers.Dense(h, activation=activation, kernel_regularizer=keras.regularizers.l2(l2)))
        model.add(layers.Dense(1))
        model.build((None, input_dim))
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss)
    return model
# Example: pick a target and build/fit using its hyperparameters
model = build_model('o3_average_values')
history = model.fit(x_train, y_train['o3_average_values'],
    batch_size=int(hyper_df[hyper_df['column_name']== 'o3_average_values']['batch size']),
    epochs=int(hyper_df[hyper_df['column_name']== 'o3_average_values']['epochs']),
    validation_data=(x_val, y_val['o3_average_values'])
)
