# Multi-Class Single-Label classification

The natural extension of binary classification is a multi-class classification task.
We first approach multi-class single-label classification, which makes the assumption that each example is assigned to one and only one label.

We use the Intelliznz data set, which consists of a classification into three mutually-exclusive classes; call these $A(0-50€)$, $B(50-100€)$ and $C(>100€)$.

While one could train three unary predicates $A(x)$, $B(x)$ and $C(x)$, it turns out to be more effective if this problem is modelled by a single binary predicate $P(x,l)$, where $l$ is a variable denoting a multi-class label, in this case classes $A$, $B$ or $C$.
- This syntax allows one to write statements quantifying over the classes, e.g. $\forall x ( \exists l ( P(x,l)))$.
- Since the classes are mutually-exclusive in this case, the output layer of the $\mathtt{MLP}$ representing $P(x,l)$ will be a $\mathtt{softmax}$ layer, instead of a $\mathtt{sigmoid}$ function, to learn the probability of $A$, $B$ and $C$. This avoids writing additional constraints $\lnot (A(x) \land B(x))$, $\lnot (A(x) \land C(x))$, ...

In [None]:
import ltn
import logging; logging.basicConfig(level=logging.INFO)
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Data

Load the Intellizenz dataset: 1.7M samples from each of three classes of veranstaltung segments (0-50€, 50-100€, >100€).

In [None]:
features = [
            'place_kirche', 'place_hotel', 'place_cafe',
            'place_theater', 'place_club', 'place_halle',
            'place_gaststaette', 'place_festhalle', 'place_kulturzentrum',
            'place_festzelt', 'place_schloss', 'place_pub',
            'place_stadthalle', 'place_park', 'place_gasthof',
            'place_kabarett', 'place_arena', 'place_schlachthof',
            'place_wandelhalle', 'place_turnhalle', 'place_buergerhaus',
            'place_museum', 'place_rathaus', 'place_staatsbad',
            'place_zelt', 'place_jazz', 'place_forum',
            'place_gymnasium', 'place_schule', 'place_sporthalle', 

            # 30 bands
            
            'state_bavaria','state_rhineland-palatinate',
            'state_baden-wuerttemberg',	'state_north rhine-westphalia',	
            'state_thuringia','state_hesse',	
            'state_brandenburg', 'state_schleswig-holstein',	
            'state_berlin',	'state_mecklenburg-western pomerania',	
            'state_lower saxony', 'state_hamburg',	
            'state_saarland', 'state_saxony-anhalt',	
            'state_saxony',	'state_bremen',

            'vg_datum_year','vg_datum_month','vg_datum_day_of_week','vg_datum_season', 

            'veranst_segment','vg_inkasso'
        ]

In [None]:
# Feautes used - 'VG_RAUM_KEYWORDS', 'VG_DATUM_VON', 'vg_state', 'BAND', 'PROMOTER'
features_v3 = [
'place_kirche', 'place_hotel', 'place_cafe',
 'place_theater', 'place_club', 'place_halle',
 'place_gaststaette', 'place_festhalle', 'place_kulturzentrum',
 'place_festzelt', 'place_schloss', 'place_pub',
 'place_stadthalle', 'place_park', 'place_gasthof',
 'place_kabarett', 'place_arena', 'place_schlachthof',
 'place_wandelhalle', 'place_turnhalle', 'place_buergerhaus',
 'place_museum', 'place_rathaus', 'place_staatsbad',
 'place_zelt', 'place_jazz', 'place_forum',
 'place_gymnasium', 'place_schule', 'place_sporthalle',

#30 bands

'state_bavaria','state_rhineland-palatinate',
'state_baden-wuerttemberg',	'state_north rhine-westphalia',	
'state_thuringia','state_hesse',	
'state_brandenburg', 'state_schleswig-holstein',	
'state_berlin',	'state_mecklenburg-western pomerania',	
'state_lower saxony', 'state_hamburg',	
'state_saarland', 'state_saxony-anhalt',	
'state_saxony',	'state_bremen'
,

'vg_datum_year','vg_datum_month','vg_datum_day_of_week','vg_datum_season',

# 30 promoters
]

In [None]:
df = pd.read_parquet("C:/Users/sgopalakrish/Downloads/intellizenz-model-training/data/export_features_2016_2020_v3.parquet.gzip")


X = df[features_v3]
y = df['veranst_segment'].astype('int')

# Encode categorical labels
l_enc = LabelEncoder()
y = l_enc.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=4)

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
# X_train['vg_datum_month']
# 'vg_datum_year','vg_datum_month','vg_datum_day_of_week','vg_datum_season',
X_train

In [None]:
batch_size = 1024
print(y_train)
print(y_test)
ds_train = tf.data.Dataset.from_tensor_slices((X_train,y_train)).batch(batch_size)
ds_test = tf.data.Dataset.from_tensor_slices((X_test,y_test)).batch(batch_size)

# LTN

Predicate with softmax `P(x,class)`

In [None]:
class MLP(tf.keras.Model):
    """Model that returns logits."""
    def __init__(self, n_classes, hidden_layer_sizes=(16,16,8)):
        super(MLP, self).__init__()
        self.denses = [tf.keras.layers.Dense(s, activation="relu") for s in hidden_layer_sizes]
        self.dense_class = tf.keras.layers.Dense(n_classes)
        self.dropout = tf.keras.layers.Dropout(0.2)
        
    def call(self, inputs, training=False):
        x = inputs
        for dense in self.denses:
            x = dense(x)
            x = self.dropout(x, training=training)
        return self.dense_class(x)

logits_model = MLP(110)#number of features
p = ltn.Predicate(ltn.utils.LogitsToPredicateModel(logits_model,single_label=True))

Constants to index/iterate on the classes

In [None]:
class_A = ltn.Constant(0, trainable=False)
class_B = ltn.Constant(1, trainable=False)
class_C = ltn.Constant(2, trainable=False)

Operators and axioms

In [None]:
Not = ltn.Wrapper_Connective(ltn.fuzzy_ops.Not_Std())
And = ltn.Wrapper_Connective(ltn.fuzzy_ops.And_Prod())
Or = ltn.Wrapper_Connective(ltn.fuzzy_ops.Or_ProbSum())
Implies = ltn.Wrapper_Connective(ltn.fuzzy_ops.Implies_Reichenbach())
Forall = ltn.Wrapper_Quantifier(ltn.fuzzy_ops.Aggreg_pMeanError(p=2),semantics="forall")

In [None]:
formula_aggregator = ltn.Wrapper_Formula_Aggregator(ltn.fuzzy_ops.Aggreg_pMeanError(p=2))

@tf.function
def axioms(features, labels, training=False):
    x_A = ltn.Variable("x_A",features[labels==0])
    x_B = ltn.Variable("x_B",features[labels==1])
    x_C = ltn.Variable("x_C",features[labels==2])
    axioms = [
        Forall(x_A,p([x_A,class_A],training=training)),
        Forall(x_B,p([x_B,class_B],training=training)),
        Forall(x_C,p([x_C,class_C],training=training))
    ]
    sat_level = formula_aggregator(axioms).tensor
    return sat_level

Initialize all layers and the static graph

In [None]:
for features, labels in ds_test:
    print("Initial sat level %.5f"%axioms(features,labels))
    break

# Training

Define the metrics. While training, we measure:
1. The level of satisfiability of the Knowledge Base of the training data.
1. The level of satisfiability of the Knowledge Base of the test data.
3. The training accuracy.
4. The test accuracy.

In [None]:
metrics_dict = {
    'train_sat_kb': tf.keras.metrics.Mean(name='train_sat_kb'),
    'test_sat_kb': tf.keras.metrics.Mean(name='test_sat_kb'),
    'train_accuracy': tf.keras.metrics.CategoricalAccuracy(name="train_accuracy"),
    'test_accuracy': tf.keras.metrics.CategoricalAccuracy(name="test_accuracy")
}

Define the training and test step

In [None]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.02)

@tf.function
def train_step(features, labels):
    # sat and update
    with tf.GradientTape() as tape:
        sat = axioms(features, labels, training=True)
        loss = 1.-sat
    gradients = tape.gradient(loss, p.trainable_variables)
    optimizer.apply_gradients(zip(gradients, p.trainable_variables))
    sat = axioms(features, labels) # compute sat without dropout
    metrics_dict['train_sat_kb'](sat)
    # accuracy
    predictions = logits_model(features)
    metrics_dict['train_accuracy'](tf.one_hot(labels,3),predictions)
    
@tf.function
def test_step(features, labels):
    # sat
    sat = axioms(features, labels)
    metrics_dict['test_sat_kb'](sat)
    # accuracy
    predictions = logits_model(features)
    metrics_dict['test_accuracy'](tf.one_hot(labels,3),predictions)

Train

In [None]:
import commons

EPOCHS = 500

commons.train(
    EPOCHS,
    metrics_dict,
    ds_train,
    ds_test,
    train_step,
    test_step,
    csv_path="intellizenz_results.csv",
    track_metrics=20
)

## Plot Satisfiability and Accuracy

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

file = pd.read_csv('intellizenz_results.csv')

sat_train_acc = file['train_sat_kb']
sat_test_acc = file['test_sat_kb']

train_acc = file['train_accuracy']
test_acc = file['test_accuracy']

epochs = file['Epoch']

plt.plot(epochs, sat_train_acc, 'g', label='train')
plt.plot(epochs, sat_test_acc, 'b', label='test')
plt.title('Training and testing set satisfiability performance')
plt.xlabel('Epochs')
plt.ylabel('Satisfiability accuracy')
plt.legend()
plt.show()


In [None]:
plt.plot(epochs, train_acc, 'g', label='train')
plt.plot(epochs, test_acc, 'b', label='test')
plt.title('Training and testing classification performance')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()