# data_world.csv

Energia grupowana po regionie i aaczasie + informacje o regionie, energia dyskretna, encoder bez atencji. Tylko transformer.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import warnings
import tqdm
warnings.filterwarnings('ignore')

2023-12-15 21:48:39.093090: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-15 21:48:39.093257: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-15 21:48:39.093603: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-15 21:48:39.154996: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.d_model = d_model
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True) 
        self.pos_encoding = tf.keras.layers.Embedding(100, d_model)

    def compute_mask(self, *args, **kwargs):
        return self.embedding.compute_mask(*args, **kwargs)

    def call(self, x):
        length = tf.shape(x)[1]
        x = self.embedding(x)
        x_pos = tf.range(length, dtype=tf.int32)
        x_pos = self.pos_encoding(x_pos)
        x = x + x_pos
        return x

In [3]:
class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

In [4]:
class CrossAttention(BaseAttention):
    def call(self, x, context):
        attn_output = self.mha(
          query=x,
          key=context,
          value=context)

        x = self.add([x, attn_output])
        x = self.layernorm(x)

        return x

In [5]:
class GlobalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        return x

In [6]:
class CausalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x,
            use_causal_mask = True)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        return x

In [7]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, dff, dropout_rate=0.1):
        super().__init__()
        self.seq = tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model),
        tf.keras.layers.Dropout(dropout_rate)
        ])
        self.add = tf.keras.layers.Add()
        self.layer_norm = tf.keras.layers.LayerNormalization()

    def call(self, x):
        x = self.add([x, self.seq(x)])
        x = self.layer_norm(x) 
        return x

In [8]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
        super().__init__()

        self.self_attention = GlobalSelfAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.ffn = FeedForward(d_model, dff)

    def call(self, x):
        x = self.self_attention(x)
        x = self.ffn(x)
        return x

In [9]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, *, num_layers, d_model, num_heads,
                dff, vocab_size, dropout_rate=0.1):
        super().__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.emb_x = tf.keras.layers.Embedding(vocab_size, d_model)
        self.emb_y = tf.keras.layers.Embedding(vocab_size, d_model)
        self.conc = tf.keras.layers.Concatenate(axis=-2)
        self.dense = tf.keras.layers.Dense(4 * d_model)
        self.dense2 = tf.keras.layers.Dense(d_model)

    def call(self, features):
        pos = features
        x = self.emb_x(pos[:,0][:, tf.newaxis])
        y = self.emb_y(pos[:,1][:, tf.newaxis])
        x = self.conc([x,y])
        x = self.dense(x)
        x = self.dense2(x)
        return x  # Shape `(batch_size, seq_len, d_model)`.

In [10]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self,
                *,
                d_model,
                num_heads,
                dff,
                dropout_rate=0.1):
        super(DecoderLayer, self).__init__()

        self.causal_self_attention = CausalSelfAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.cross_attention = CrossAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.ffn = FeedForward(d_model, dff)

    def call(self, x, context):
        x = self.causal_self_attention(x=x)
        x = self.cross_attention(x=x, context=context)

        x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
        return x

In [11]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size,
                dropout_rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
                                                d_model=d_model,)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dec_layers = [
            DecoderLayer(d_model=d_model, num_heads=num_heads,
                        dff=dff, dropout_rate=dropout_rate)
            for _ in range(num_layers)]

        self.last_attn_scores = None

    def call(self, x, context):
        # `x` is token-IDs shape (batch, target_seq_len)
        x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

        x = self.dropout(x)

        for i in range(self.num_layers):
            x  = self.dec_layers[i](x, context)

        # The shape of x is (batch_size, target_seq_len, d_model).
        return x

In [12]:
class Transformer(tf.keras.Model):
    def __init__(self, *, num_layers, d_model, num_heads,
                dropout_rate=0.1, vocab_size):
        super().__init__()
        self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=4*d_model,
                            vocab_size=vocab_size,
                            dropout_rate=dropout_rate)

        self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=4*d_model,
                            vocab_size=vocab_size,
                            dropout_rate=dropout_rate)

        self.final_layer = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        # To use a Keras model with `.fit` you must pass all your inputs in the
        # first argument.
        x, context  = inputs
        context = self.encoder(context)  # (batch_size, context_len, d_model)
        x = self.decoder(x, context) # (batch_size, target_len, d_model)
        x = x[:, -1, :]
        logits = self.final_layer(x)  # (batch_size, 1, target_vocab_size)

        return logits

In [13]:
df = pd.read_csv('../../data/usgs_data_small.csv')
df.head()

Unnamed: 0,time,longitude,latitude,depth,mag
0,1940-01-06 20:04:35.170000,25.814,35.373,15.0,5.84
1,1940-01-06 15:03:40.340000,171.134,-22.015,180.0,
2,1940-01-06 09:15:39.210000,151.498,45.077,25.0,6.07
3,1940-01-05 09:42:55.570000,-116.367333,33.173167,6.0,3.42
4,1940-01-05 07:20:50.460000,-119.442,32.929333,6.0,3.97


In [14]:
df = df[["time", "latitude", "longitude", "mag"]]

In [15]:
df = df[df["time"] > '1973-01-01']

In [16]:
df["time"] = pd.to_datetime(df["time"], format="mixed")
df

Unnamed: 0,time,latitude,longitude,mag
69377,1973-01-07 00:12:09.960,19.771500,-155.432333,2.34
69378,1973-01-06 23:15:49.900,-15.496000,167.487000,5.30
69379,1973-01-06 21:01:50.900,33.098000,48.256000,4.80
69380,1973-01-06 17:26:40.100,20.207667,-157.021500,3.23
69381,1973-01-06 16:52:41.900,-14.665000,166.384000,6.10
...,...,...,...,...
4329143,2023-09-24 02:20:39.520,34.071333,-117.314167,1.09
4329144,2023-09-24 02:11:22.160,58.195167,-155.259000,0.14
4329145,2023-09-24 02:10:46.071,40.223400,-119.668800,0.70
4329146,2023-09-24 02:06:35.040,39.279500,-123.223333,1.91


In [17]:
time_split = 30
geo_split = 1

In [18]:
df["energy"] = 10**(1.44*df["mag"]+5.24)
df["label"] = np.where(df["mag"] > 5, 1, 0)
df["time"] = df["time"].dt.floor('d')
df["time"] = df["time"] - np.min(df["time"])
df["time"] = df["time"].dt.days // time_split
df["time"] = df["time"].astype(int)
df["latitude"] = df["latitude"] // geo_split
df["latitude"] = df["latitude"] + np.abs(np.min(df["latitude"]))
df["longitude"] = df["longitude"] // geo_split
df["longitude"] = df["longitude"] + np.abs(np.min(df["longitude"]))
df["geo"] = df["latitude"].astype(str) + "_" + df["longitude"].astype(str)

In [23]:
df_agg = df.groupby(["latitude", "longitude", "time"]).agg({"energy": "sum", "label": "sum", "geo": "max"}).reset_index()
df_agg

Unnamed: 0,latitude,longitude,time,energy,label,geo
0,0.0,221.0,482,3.767038e+11,0,0.0_221.0
1,0.0,312.0,473,7.311391e+11,0,0.0_312.0
2,1.0,312.0,375,5.248075e+11,0,1.0_312.0
3,2.0,136.0,268,1.018591e+12,0,2.0_136.0
4,2.0,291.0,580,1.940886e+11,0,2.0_291.0
...,...,...,...,...,...,...
497119,172.0,227.0,511,1.000000e+11,0,172.0_227.0
497120,172.0,232.0,313,1.000000e+11,0,172.0_232.0
497121,172.0,235.0,359,2.703958e+11,0,172.0_235.0
497122,172.0,236.0,245,2.703958e+11,0,172.0_236.0


In [24]:
df_agg["label"] = np.where(df_agg["label"] > 0, 1, 0)

In [29]:
def make_ds(df_agg, block_size, vocab_size):
    dfs_train, dfs_val = [], []
    for pos in tqdm.tqdm(df_agg["geo"].unique()):
        i, j = pos.split("_")
        i, j = float(i), float(j)
        tmp = df_agg[(df_agg["latitude"] == i) & (df_agg["longitude"] == j)]
        if not tmp.empty:
            start = tmp["time"].min()
            end = tmp["time"].max()
            tmp = tmp.set_index("time").reindex(range(start, end)).fillna(0).rename_axis('time').reset_index()
            tmp["label"] = tmp["label"].shift(-1)
            tmp["latitude"] = i
            tmp["longitude"] = j
            n = int(0.8 * len(tmp))
            df_train = tmp[:n]
            df_val = tmp[n:]
            bins_train = np.linspace(df_train["energy"].min(), df_train["energy"].max(), vocab_size)
            bins_val = np.linspace(df_train["energy"].min(), df_train["energy"].max(), vocab_size)
            df_train["energy"] = np.digitize(df_train["energy"], bins_train)
            df_val["energy"] = np.digitize(df_val["energy"], bins_val)
            for idx in range(1, block_size):
                df_train["energy" + str(idx)] = df_train["energy"].shift(idx)
                df_val["energy" + str(idx)] = df_val["energy"].shift(idx)
            dfs_train.append(df_train)
            dfs_val.append(df_val)
    df_final_train = pd.concat(dfs_train)
    df_final_val = pd.concat(dfs_val)
    return df_final_train, df_final_val

In [30]:
block_size = 16
vocab_size = 64
df_train, df_val = make_ds(df_agg, block_size, vocab_size)
df_train.dropna(inplace=True)
df_val.dropna(inplace=True)

100%|██████████| 13390/13390 [04:13<00:00, 52.73it/s]


In [31]:
df_train.head()

Unnamed: 0,time,latitude,longitude,energy,label,geo,energy1,energy2,energy3,energy4,...,energy6,energy7,energy8,energy9,energy10,energy11,energy12,energy13,energy14,energy15
15,444,11.0,44.0,1,0.0,0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,64.0
16,445,11.0,44.0,1,0.0,0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
17,446,11.0,44.0,1,0.0,0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
18,447,11.0,44.0,1,0.0,0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
19,448,11.0,44.0,1,0.0,0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [39]:
x_train = df_train.drop(["label", "latitude", "longitude", "time", "geo"], axis=1)[::-1].to_numpy()
y_train = df_train["label"].to_numpy()
x_val = df_val.drop(["label", "latitude", "longitude", "time", "geo"], axis=1)[::-1].to_numpy()
y_val = df_val["label"].to_numpy()

In [40]:
x_pos_train = df_train[["longitude", "latitude"]].to_numpy()
x_pos_val = df_val[["longitude", "latitude"]].to_numpy()

In [41]:
# add anoother column whith 1 - label
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)
# y_train = np.concatenate((y_train, 1 - y_train), axis=1)
# y_val = np.concatenate((y_val, 1 - y_val), axis=1)

In [51]:
df_train["label"].value_counts()

label
0.0    2957147
1.0      35562
Name: count, dtype: int64

In [52]:
pos = 35562
neg = 2957147
total = pos + neg

In [56]:
model = Transformer(num_layers=2, d_model=128, num_heads=2, dropout_rate=0.1, vocab_size=vocab_size)

In [57]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                metrics=['accuracy', tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])

In [60]:
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)
class_weight = {0: weight_for_0, 1: weight_for_1}
model.fit([x_train, x_pos_train], y_train, epochs=40, batch_size=2048, validation_data=([x_val, x_pos_val], y_val), class_weight=class_weight)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40

KeyboardInterrupt: 

In [59]:
model.build(input_shape=[(None, block_size), (None, 2)])
model.count_params()

962049

In [None]:
model.summary()

Model: "transformer_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_2 (Encoder)         multiple                  3175424   
                                                                 
 decoder_2 (Decoder)         multiple                  4247552   
                                                                 
 dense_50 (Dense)            multiple                  514       
                                                                 
Total params: 7423490 (28.32 MB)
Trainable params: 7423490 (28.32 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
