# Categorical Embedding

## Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import functools

from keras import backend as K
from itertools import chain
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.metrics import top_k_categorical_accuracy
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Input, Embedding,Reshape, Concatenate, Conv1D, BatchNormalization, GlobalMaxPooling1D, MaxPooling1D
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, recall_score, precision_score, accuracy_score

## Data

In [2]:
data = pd.read_csv("./data/splited_full_RASFF_DATA.csv", sep=";", header=0, index_col=0)
data = data.sample(frac=1)

data.head(1)

Unnamed: 0,DATE_CASE,NOT_COUNTRY,PROD_CAT,TYPE,RISK_DECISION,ACTION_TAKEN,DISTRIBUTION_STAT,HAZARDS_CAT,COUNT_ORIGEN,COUNT_DESTIN,COUNT_CONCERN
122787,2009-10-15,Poland,herbs and spices,food,undecided,official detention,no distribution,mycotoxins,India,,Poland


In [3]:
class Stage:
	def __init__(self, input, output):
		self.input = input
		self.output = output

		self.x = data.iloc[:, input]
		self.y = data.iloc[:, output]

		self.x_train, self.y_train = None, None
		self.x_val, self.y_val = None, None
		self.x_test, self.y_test = None, None

		self.__transform()

	def __transform(self):
		strategy_x = OneHotEncoder(handle_unknown="ignore")
		strategy_y = OneHotEncoder(handle_unknown="ignore", sparse=False)

		strategy_x.fit(self.x.values)
		strategy_y.fit(self.y.values)

		self.x = strategy_x.transform(self.x.values)
		self.y = strategy_y.transform(self.y.values)

		self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.x, self.y, test_size=0.2)
		self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x_train, self.y_train, test_size=0.25, random_state=42, shuffle=True)

	def get_metrics(self):
		result = model.predict(self.x_test, batch_size=64)
		result = np.argmax(result, axis=-1)

		print(f"- Accuracy: {round(accuracy_score(np.argmax(self.y_test, axis=-1), result)*100, 2)}%")
		print(f"- Specifity: {round(get_specifity(np.argmax(self.y_test, axis=-1), result)*100, 2)}%")
		print(f"- Sensitivity: {round(recall_score(np.argmax(self.y_test, axis=-1), result, average='macro', zero_division=0)*100, 2)}%")
		print(f"- Precision: {round(precision_score(np.argmax(self.y_test, axis=-1), result, average='macro', zero_division=0)*100, 2)}%")

		print(classification_report(np.argmax(self.y_test, axis=-1), result, zero_division=True))

		cm = confusion_matrix(np.argmax(self.y_test, axis=-1), result)
		cm = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(range(0, cm.shape[0])))

		_, ax = plt.subplots(figsize=(10, 10))
		cm.plot(ax=ax)

		plt.show()

## Preprocessing

In [4]:
data.DATE_CASE = data.DATE_CASE.astype(str)
data.HAZARDS_CAT = data.HAZARDS_CAT.astype(str)

In [5]:
def get_specifity(y_actual, y_pred):
    TN = []
    FP = []

    for index ,_id in enumerate(np.union1d(y_actual, y_pred)):
        FP.append(0)
        TN.append(0)

        for i in range(len(y_pred)):
            if y_pred[i] == _id and y_actual[i] != y_pred[i]:
                FP[index] += 1
            if y_actual[i] == y_pred[i] != _id:
                TN[index] += 1

    TN = sum(TN)
    FP = sum(FP)

    return TN/(TN + FP)

## Data Mining

In [6]:
stage1 = Stage(
	input=[0, 1, 6, 8],
	output=[2]
)

In [7]:
K.clear_session()

model = Sequential()
model.add(Reshape(input_shape=(6051, ), target_shape=(6051, 1)))
model.add(Conv1D(filters=128,kernel_size=4, activation = "relu"))
model.add(Conv1D(filters=128,kernel_size=4, activation = "relu"))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=256,kernel_size=3, activation = "relu"))
model.add(Conv1D(filters=256,kernel_size=3, activation = "relu"))
model.add(BatchNormalization())
model.add(GlobalMaxPooling1D())
model.add(Dense(512, activation = "relu"))
model.add(Dense(256, activation = "relu"))
model.add(Dense(43, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['categorical_accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 6051, 1)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 6048, 128)         640       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 6045, 128)         65664     
_________________________________________________________________
batch_normalization (BatchNo (None, 6045, 128)         512       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 3022, 128)         0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 3020, 256)         98560     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 3018, 256)         1

In [8]:
hist = model.fit(stage1.x_train, stage1.y_train, epochs=170, validation_data=(stage1.x_val, stage1.y_val), batch_size=500)

Epoch 1/170


TypeError: in user code:

    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py:787 train_step
        y_pred = self(x, training=True)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\sequential.py:369 call
        return super(Sequential, self).call(inputs, training=training, mask=mask)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\functional.py:414 call
        return self._run_internal_graph(
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\functional.py:550 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\engine\base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\keras\layers\core.py:534 call
        result = tf.reshape(
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\array_ops.py:196 reshape
        result = gen_array_ops.reshape(tensor, shape, name)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\gen_array_ops.py:8402 reshape
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\op_def_library.py:525 _apply_op_helper
        raise err
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\op_def_library.py:517 _apply_op_helper
        values = ops.convert_to_tensor(
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\profiler\trace.py:163 wrapped
        return func(*args, **kwargs)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:1566 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\constant_op.py:346 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\constant_op.py:271 constant
        return _constant_impl(value, dtype, shape, name, verify_shape=False,
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\constant_op.py:288 _constant_impl
        tensor_util.make_tensor_proto(
    C:\Users\ferna\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\tensor_util.py:551 make_tensor_proto
        raise TypeError("Failed to convert object of type %s to Tensor. "

    TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=Tensor("DeserializeSparse:0", shape=(None, 2), dtype=int64), values=Tensor("DeserializeSparse:1", shape=(None,), dtype=float32), dense_shape=Tensor("stack:0", shape=(2,), dtype=int64)). Consider casting elements to a supported type.


In [None]:
stage1.get_metrics()

Error: Kernel is dead