In [None]:
!pip install tensorflow

In [None]:
pip install tf-keras

In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model
from tensorflow.keras.layers import Lambda
from transformers import TFViTModel, ViTImageProcessor
import json
import os
from tensorflow.keras.utils import register_keras_serializable



In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_dir = '/content/drive/MyDrive/Colab Notebooks/Masters/BigDataAnalysisProject/osv5m/processed/train/'

In [None]:
# Load Multiple Files as one training group
file_start = 0
number_files = 3

df_train = pd.DataFrame()
for i in range(file_start, file_start + number_files):
    df_train = pd.concat([df_train, pd.read_pickle(f'{train_dir}0{i}.pkl')], ignore_index=True)
print(len(df_train))
print(df_train.columns)

150000
Index(['latitude', 'longitude', 'country', 'city', 'image', 'time',
       'year_percent'],
      dtype='object')


In [None]:
# Directory of the models and history
model_dir = '/content/drive/MyDrive/Colab Notebooks/Masters/BigDataAnalysisProject/models/'
# Name of the models and name of their saved files
model_name = 'transformer_v2_model' # Change this to change model
model_path = f'{model_dir}{model_name}.keras'
history_path = f'{model_dir}{model_name}_history.json'

In [None]:
def cnn_model():
    image_input = Input(shape=(224, 224, 3), name='image_input')
    time_date_input = Input(shape=(2,), name='time_date')

    x = layers.Conv2D(64, (3, 3), activation='relu')(image_input)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.BatchNormalization()(x)

    combined = layers.concatenate([x, time_date_input])

    z = layers.Dense(64, activation='relu')(combined)
    z = layers.BatchNormalization()(z)

    output = layers.Dense(2)(z)

    returned_model = Model(inputs=[image_input, time_date_input], outputs=output)
    return returned_model

def cv_trig_model():
    image_input = Input(shape=(224, 224, 3), name='image_input')
    time_date_input = Input(shape=(2,), name='time_date')

    vit = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
    vit.trainable = True
    processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

    def vit_preprocess(images):
        mean = tf.constant(processor.image_mean, dtype=tf.float32)
        std = tf.constant(processor.image_std, dtype=tf.float32)
        mean = tf.reshape(mean, (1, 3, 1, 1))
        std = tf.reshape(std, (1, 3, 1, 1))
        return (images / 255.0 - mean) / std

    image_cf = Lambda(lambda x: tf.transpose(x, perm=[0, 3, 1, 2]), name="to_channels_first")(image_input)

    normalized = Lambda(vit_preprocess, output_shape=(224, 224, 3))(image_cf)
    x = Lambda(lambda x: vit(x, training=True).last_hidden_state, output_shape=(197, 768))(normalized)
    x = layers.GlobalAveragePooling1D()(x)

    combined = layers.concatenate([x, time_date_input])

    z = layers.Dense(128)(x)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(128)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(64)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(64)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    outputs = layers.Dense(3)(z) # separate cos and sin of latitude and long

    returned_model = Model(inputs=[image_input, time_date_input], outputs=outputs)
    return returned_model

def fc_model():
    image_input = Input(shape=(224, 224, 3), name='image_input')
    time_date_input = Input(shape=(2,), name='time_date')

    x = layers.Flatten()(image_input)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dense(32, activation='relu')(x)

    outputs = layers.Dense(2)(x)

    returned_model = Model(inputs=[image_input, time_date_input], outputs=outputs)
    return returned_model

def cnn_up_trig_model():
    image_input = Input(shape=(224, 224, 3), name='image_input')
    time_date_input = Input(shape=(2,), name='time_date')

    x = layers.Conv2D(32, (3, 3))(image_input)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(64, (3, 3))(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(128, (3, 3))(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    combined = layers.concatenate([x, time_date_input])

    z = layers.Dense(64)(combined)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    outputs = layers.Dense(3)(z) # separate cos and sin of latitude and long

    returned_model = Model(inputs=[image_input, time_date_input], outputs=outputs)
    return returned_model

In [None]:
# Functions that need to be serializable to be loaded when saved for transformer v2
@register_keras_serializable()
class TransposeCHW(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.transpose(inputs, perm=[0, 3, 1, 2])

@register_keras_serializable()
class ViTPoolerOutput(tf.keras.layers.Layer):
    def __init__(self, model_name="google/vit-base-patch16-224-in21k", **kwargs):
        super().__init__(**kwargs)
        self.model_name = model_name
        from transformers import TFViTModel
        self.vit = TFViTModel.from_pretrained(self.model_name)
        self.vit.trainable = True

    def call(self, inputs):
        return self.vit({"pixel_values": inputs}).pooler_output

    def get_config(self):
        config = super().get_config()
        config.update({
            "model_name": self.model_name
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Transformer V2
def cv_v2_model():
    image_input = Input(shape=(224, 224, 3), name='image_input')
    time_date_input = Input(shape=(2,), name='time_date')

    x = TransposeCHW()(image_input)
    x = ViTPoolerOutput()(x)

    combined = layers.concatenate([x, time_date_input])

    z = layers.Dense(256)(x)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(128)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(64)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    z = layers.Dense(32)(z)
    z = layers.BatchNormalization()(z)
    z = layers.ReLU()(z)

    outputs = layers.Dense(2)(z) # separate cos and sin of latitude and long

    returned_model = Model(inputs=[image_input, time_date_input], outputs=outputs)
    return returned_model

In [None]:
# If file exists then load model, else create new model
if os.path.exists(model_path):
    print("Loading existing model...")
    model = tf.keras.models.load_model(model_path)
    with open(history_path, "r") as f:
      old_history = json.load(f)
else:
    print("Creating new model...")
    model = cv_v2_model()
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mean_squared_error', metrics=['mae'])
    old_history = None

Loading existing model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
All PyTorch model weights were used when initializing TFViTModel.

All the weights of TFViTModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [None]:
# Separate out loaded data
X_train_image = np.stack(df_train["image"].values)
X_train_time_date = df_train[["time", "year_percent"]].to_numpy().astype(np.float32)

y_train = df_train[['latitude', 'longitude']].values.astype(np.float32)
del df_train

In [None]:
# OPTIONAL BLOCK TO TRANSFORM DATA
# transform y_train to be (lat: [-1, 1], cos(long): [-1, 1], sin(long): [-1, 1])
lat_scaled = y_train[:, 0] / 90.0
long_rad = np.radians(y_train[:, 1])

long_cos = np.cos(long_rad)
long_sin = np.sin(long_rad)

# Use this as y in model.fit if you want to transform the data
y_transformed = np.stack([lat_scaled, long_cos, long_sin], axis=1)

In [None]:
for i in range(9):
    new_history = model.fit(
        x={"image_input": X_train_image, "time_date": X_train_time_date},
        y=y_train,
        epochs=1,
        batch_size=64,
        validation_split=0.2)

    model.save(model_path)

    if old_history is None:
        old_history = {key: [] for key in new_history.history.keys()}
    combined_history = {}
    for key in new_history.history.keys():
        if key in old_history.keys():
            combined_history[key] = [(float)(x) for x in list(np.concatenate((old_history[key], new_history.history[key]), axis=0))]
        else:
            combined_history[key] = new_history.history[key]
    old_history = combined_history.copy()

    with open(history_path, "w") as f:
        json.dump(combined_history, f)
        print("Saved History")

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11973s[0m 6s/step - loss: 1729.6740 - mae: 25.4684 - val_loss: 1154.9860 - val_mae: 20.0157
Saved History
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12094s[0m 6s/step - loss: 1525.0836 - mae: 24.2657 - val_loss: 1144.3341 - val_mae: 20.0502
Saved History
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12195s[0m 7s/step - loss: 1428.1615 - mae: 23.5272 - val_loss: 1087.0917 - val_mae: 19.5058
Saved History
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11961s[0m 6s/step - loss: 1315.4471 - mae: 22.6788 - val_loss: 1144.1581 - val_mae: 20.1372
Saved History
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12032s[0m 6s/step - loss: 1236.7421 - mae: 22.0593 - val_loss: 1389.7229 - val_mae: 22.8025
Saved History
[1m1481/1875[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m33:20[0m 5s/step - loss: 1179.5049 - mae: 21.6298

KeyboardInterrupt: 

In [None]:
# Combine history is old history exists
if old_history is None:
    print("No old history found.")
    combined_history = new_history.history
else:
    print("Combining new history with old history.")
    combined_history = {}
    for key in old_history.keys():
        combined_history[key] = [(float)(x) for x in list(np.concatenate((old_history[key], new_history.history[key]), axis=0))]

In [None]:
# Save Model
model.save(model_path)
print("Saved Model")

In [None]:
# Save History
with open(history_path, "w") as f:
    json.dump(combined_history, f)
    print("Saved History")

In [None]:
from google.colab import runtime
# End session to save compute time
runtime.unassign()