# data_world.csv

Energia grupowana po regionie i czasie + informacje o regionie + liczba trzęsień, energia ciągła (LSTM embeddings), encoder bez atencji. Tylko transformer.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings('ignore')

2023-10-04 15:39:13.875985: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        self.lstm = tf.keras.layers.LSTM(d_model, return_sequences=True)
        self.pos_encoding = tf.keras.layers.Embedding(100, d_model)

    def call(self, x):
        length = tf.shape(x)[1]
        x = self.lstm(x)
        x_pos = tf.range(length, dtype=tf.int32)
        x_pos = self.pos_encoding(x_pos)
        x = x + x_pos
        return x

In [3]:
class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

In [4]:
class CrossAttention(BaseAttention):
    def call(self, x, context):
        attn_output = self.mha(
          query=x,
          key=context,
          value=context)

        x = self.add([x, attn_output])
        x = self.layernorm(x)

        return x

In [5]:
class GlobalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        return x

In [6]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, dff, dropout_rate=0.1):
        super().__init__()
        self.seq = tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model),
        tf.keras.layers.Dropout(dropout_rate)
        ])
        self.add = tf.keras.layers.Add()
        self.layer_norm = tf.keras.layers.LayerNormalization()

    def call(self, x):
        x = self.add([x, self.seq(x)])
        x = self.layer_norm(x) 
        return x

In [7]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, *, num_layers, d_model, num_heads,
                dff, vocab_size, dropout_rate=0.1):
        super().__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.emb_x = tf.keras.layers.Embedding(vocab_size, d_model)
        self.emb_y = tf.keras.layers.Embedding(vocab_size, d_model)
        self.conc = tf.keras.layers.Concatenate(axis=-2)
        self.dense = tf.keras.layers.Dense(4 * d_model)
        self.dense2 = tf.keras.layers.Dense(d_model)

    def call(self, features):
        pos = features
        x = self.emb_x(pos[:,0][:, tf.newaxis])
        y = self.emb_y(pos[:,1][:, tf.newaxis])
        x = self.conc([x,y])
        x = self.dense(x)
        x = self.dense2(x)
        return x  # Shape `(batch_size, seq_len, d_model)`.

In [8]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self,
                *,
                d_model,
                num_heads,
                dff,
                dropout_rate=0.1):
        super(DecoderLayer, self).__init__()

        self.causal_self_attention = GlobalSelfAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.cross_attention = CrossAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.ffn = FeedForward(d_model, dff)

    def call(self, x, context):
        x = self.causal_self_attention(x=x)
        x = self.cross_attention(x=x, context=context)

        x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
        return x

In [9]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, *, num_layers, d_model, num_heads, dff,
                dropout_rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.pos_embedding = PositionalEmbedding(d_model=d_model,)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dec_layers = [
            DecoderLayer(d_model=d_model, num_heads=num_heads,
                        dff=dff, dropout_rate=dropout_rate)
            for _ in range(num_layers)]

        self.last_attn_scores = None

    def call(self, x, context):
        # `x` is token-IDs shape (batch, target_seq_len)
        x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

        x = self.dropout(x)

        for i in range(self.num_layers):
            x  = self.dec_layers[i](x, context)

        # The shape of x is (batch_size, target_seq_len, d_model).
        return x

In [10]:
class Transformer(tf.keras.Model):
    def __init__(self, *, num_layers, d_model, num_heads,
                dropout_rate=0.1):
        super().__init__()
        self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=4*d_model,
                            dropout_rate=dropout_rate)

        self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=4*d_model,
                            dropout_rate=dropout_rate)

        self.final_layer = tf.keras.layers.Dense(2)

    def call(self, inputs):
        # To use a Keras model with `.fit` you must pass all your inputs in the
        # first argument.
        x, context  = inputs
        context = self.encoder(context)  # (batch_size, context_len, d_model)
        x = self.decoder(x, context) # (batch_size, target_len, d_model)
        x = x[:, -1, :]
        logits = self.final_layer(x)  # (batch_size, 1, target_vocab_size)

        return logits

In [11]:
df = pd.read_csv('../../data/data_world.csv')
df.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,1930-12-08T08:01:02.000Z,23.261,120.277,15.0,6.3,mw,,,,,...,2015-05-13T18:52:43.000Z,,,,,,,,,
1,1930-12-03T18:51:47.000Z,18.233,96.298,10.0,7.4,mw,,,,,...,2015-05-13T18:52:43.000Z,,,,,,,,,
2,1930-12-02T07:01:30.000Z,25.854,98.356,35.0,6.2,mw,,,,,...,2015-05-13T18:52:43.000Z,,,,,,,,,
3,1930-11-28T07:32:56.000Z,18.779,-106.767,15.0,6.3,mw,,,,,...,2015-05-13T18:52:43.000Z,,,,,,,,,
4,1930-11-25T19:02:53.000Z,35.05,139.129,15.0,6.9,mw,,,,,...,2015-05-13T18:52:43.000Z,,,,,,,,,


In [12]:
df = df[["time", "latitude", "longitude", "mag"]]

In [13]:
df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%dT%H:%M:%S.%fZ")
df

Unnamed: 0,time,latitude,longitude,mag
0,1930-12-08 08:01:02.000,23.2610,120.2770,6.3
1,1930-12-03 18:51:47.000,18.2330,96.2980,7.4
2,1930-12-02 07:01:30.000,25.8540,98.3560,6.2
3,1930-11-28 07:32:56.000,18.7790,-106.7670,6.3
4,1930-11-25 19:02:53.000,35.0500,139.1290,6.9
...,...,...,...,...
797041,2018-09-01 01:14:38.230,-30.4830,-177.9279,4.3
797042,2018-09-01 01:07:59.120,-10.7558,124.3621,4.0
797043,2018-09-01 01:00:13.810,-5.5167,147.1735,4.6
797044,2018-09-01 00:27:11.440,46.8819,155.6566,4.3


In [14]:
time_split = 30
time_cut = dt.datetime(1973, 1, 1)
geo_split = 10

In [15]:
df = df[df["time"] > time_cut]
df["energy"] = 10**(1.44*df["mag"]+5.24)
df["label"] = np.where(df["mag"] > 5, 1, 0)
df["time"] = df["time"].dt.floor('d')
df["time"] = df["time"] - np.min(df["time"])
df["time"] = df["time"].dt.days // time_split
df["time"] = df["time"].astype(int)
df["latitude"] = df["latitude"] // geo_split
df["latitude"] = df["latitude"] + np.abs(np.min(df["latitude"]))
df["longitude"] = df["longitude"] // geo_split
df["longitude"] = df["longitude"] + np.abs(np.min(df["longitude"]))

In [16]:
df_agg = df.groupby(["latitude", "longitude", "time"]).agg({"energy": "sum", "label": "sum"}).reset_index()
df_agg["count"] = df.groupby(["latitude", "longitude", "time"]).count().reset_index()["mag"]
df_agg

Unnamed: 0,latitude,longitude,time,energy,label,count
0,0.0,13.0,268,1.018591e+12,0,1
1,0.0,22.0,482,3.767038e+11,0,1
2,0.0,31.0,375,5.248075e+11,0,1
3,0.0,31.0,473,7.311391e+11,0,1
4,0.0,33.0,280,2.703958e+11,0,1
...,...,...,...,...,...,...
71216,17.0,30.0,487,1.018591e+12,0,1
71217,17.0,30.0,489,3.767038e+11,0,1
71218,17.0,30.0,509,7.311391e+11,0,1
71219,17.0,30.0,538,3.767038e+11,0,1


In [17]:
df_agg["label"] = np.where(df_agg["label"] > 0, 1, 0)

In [18]:
def make_ds(df_agg, block_size):
    dfs_train, dfs_val = [], []
    for i in df_agg["latitude"].unique():
        for j in df_agg["longitude"].unique():
            tmp = df_agg[(df_agg["latitude"] == i) & (df_agg["longitude"] == j)]
            if not tmp.empty:
                start = max(tmp["time"].min() - block_size, 0)
                end = min(tmp["time"].max() + block_size, df_agg["time"].max())
                tmp = tmp.set_index("time").reindex(range(start, end)).fillna(0).rename_axis('time')
                tmp["label"] = tmp["label"].shift(-1)
                tmp["latitude"] = i
                tmp["longitude"] = j
                n = int(0.8 * len(tmp))
                df_train = tmp[:n]
                df_val = tmp[n:]    
                dfs_train.append(df_train)
                dfs_val.append(df_val)
    df_final_train = pd.concat(dfs_train)
    df_final_val = pd.concat(dfs_val)
    return df_final_train, df_final_val

In [19]:
block_size = 16
df_train, df_val = make_ds(df_agg, block_size)

In [20]:
dfs_train, dfs_val = [], []
for i in df_train["latitude"].unique():
    for j in df_train["longitude"].unique():
            tmp1 = df_train[(df_train["latitude"] == i) & (df_train["longitude"] == j)]
            tmp2 = df_val[(df_val["latitude"] == i) & (df_val["longitude"] == j)]
            if not tmp1.empty and not tmp2.empty:
                scaler = MinMaxScaler()
                tmp1["energy0"] = scaler.fit_transform(tmp1[["energy"]])
                tmp2["energy0"] = scaler.transform(tmp2[["energy"]])
                for idx in range(1, block_size):
                    tmp1["count" + str(idx)] = tmp1["count"].shift(idx)
                    tmp2["count" + str(idx)] = tmp2["count"].shift(idx)
                    tmp1["energy" + str(idx)] = tmp1["energy0"].shift(idx)
                    tmp2["energy" + str(idx)] = tmp2["energy0"].shift(idx)
                dfs_train.append(tmp1)
                dfs_val.append(tmp2)
df_final_train = pd.concat(dfs_train)
df_final_val = pd.concat(dfs_val)

In [21]:
df_final_train.head()

Unnamed: 0_level_0,latitude,longitude,energy,label,count,energy0,count1,energy1,count2,energy2,...,count11,energy11,count12,energy12,count13,energy13,count14,energy14,count15,energy15
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
252,0.0,13.0,0.0,0.0,0.0,0.0,,,,,...,,,,,,,,,,
253,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,,,,,
254,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
255,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
256,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [22]:
df_final_train.dropna(inplace=True)
df_final_val.dropna(inplace=True)

In [23]:
x_train = df_final_train.drop(["label", "longitude", "latitude", "energy"], axis=1).to_numpy()
y_train = df_final_train["label"].to_numpy()
x_val = df_final_val.drop(["label", "longitude", "latitude", "energy"], axis=1).to_numpy()
y_val = df_final_val["label"].to_numpy()

In [24]:
x_train = x_train.reshape(-1, block_size, 2)
x_val = x_val.reshape(-1, block_size, 2)
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)

In [25]:
x_pos_train = df_final_train[["longitude", "latitude"]].to_numpy()
x_pos_val = df_final_val[["longitude", "latitude"]].to_numpy()

In [26]:
# add anoother column whith 1 - label
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)
y_train = np.concatenate((y_train, 1 - y_train), axis=1)
y_val = np.concatenate((y_val, 1 - y_val), axis=1)

In [27]:
model = Transformer(num_layers=4, d_model=256, num_heads=4, dropout_rate=0)

2023-10-04 15:40:22.326759: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-04 15:40:22.515267: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-04 15:40:22.515406: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-04 15:40:22.529113: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-04 15:40:22.529225: I tensorflow/compile

In [28]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy', tf.keras.metrics.F1Score()])

In [29]:
model.fit([x_train, x_pos_train], y_train, epochs=10, batch_size=512, validation_data=([x_val, x_pos_val], y_val))

Epoch 1/10


2023-10-04 15:40:56.586224: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
Could not load symbol cublasGetSmCountTarget from libcublas.so.11. Error: /home/majkel/miniconda3/lib/libcublas.so.11: undefined symbol: cublasGetSmCountTarget
2023-10-04 15:41:00.973905: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4592fb50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-10-04 15:41:00.974062: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2060 with Max-Q Design, Compute Capability 7.5
2023-10-04 15:41:01.153676: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-10-04 15:41:02.508999: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f065fc2e830>

In [41]:
model.count_params()

7423490

In [42]:
model.summary()

Model: "transformer_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_2 (Encoder)         multiple                  3175424   
                                                                 
 decoder_2 (Decoder)         multiple                  4247552   
                                                                 
 dense_50 (Dense)            multiple                  514       
                                                                 
Total params: 7423490 (28.32 MB)
Trainable params: 7423490 (28.32 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
