# Transformer - ohne Metadaten
## Installment of Moduls

In [1]:
!pip install -r requirements.txt

ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'


## Loading of the Moduls

In [2]:
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
import autokeras as ak

## Definition of Project Information
### Constants

In [3]:
RANDOM_STATE = 43
INPUT_FILE_NAME = "emails.csv"
BASE_MODEL_NAME = "distilbert-base-uncased"
OUTPUT_FOLDER_NAME = "without meta"

### Paths

In [4]:
NOTEBOOK_ROOT_PATH = pathlib.Path.cwd()
INPUT_FILE_PATH = pathlib.Path.joinpath(NOTEBOOK_ROOT_PATH, INPUT_FILE_NAME)
OUTPUT_FOLDER_PATH = pathlib.Path.joinpath(NOTEBOOK_ROOT_PATH, OUTPUT_FOLDER_NAME)

### GPU

In [5]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
tf.test.is_built_with_cuda(), tf.test.is_built_with_gpu_support()

(True, True)

## Load File to Dataframe

In [7]:
dataframe = pd.read_csv(INPUT_FILE_NAME)
dataframe.head()

Unnamed: 0,text,spam
0,Subject: naturally irresistible your corporate...,1
1,Subject: the stock trading gunslinger fanny i...,1
2,Subject: unbelievable new homes made easy im ...,1
3,Subject: 4 color printing special request add...,1
4,"Subject: do not have money , get software cds ...",1


# Model Generation
## Definition of the AutoKeras Model
### Input Objects

In [8]:
content_txt_input = ak.TextInput()

## Metrics

In [9]:
model_metrics = [
    tf.keras.metrics.BinaryAccuracy(),
    tf.keras.metrics.Precision(),
    tf.keras.metrics.Recall(),
]

### Processing Block

In [10]:
content_txt_output = ak.TextBlock(block_type="transformer")(content_txt_input)
classification_head = ak.ClassificationHead(
    num_classes=2,
    multi_label=False,
    metrics=model_metrics
)(content_txt_output)

### Build Complete Model

In [11]:
model = ak.AutoModel(
    inputs= content_txt_input,
    outputs= classification_head,
    objective="val_binary_accuracy",
    max_trials= 10,
    overwrite= True,
    seed= RANDOM_STATE
)

## Callbacks

In [12]:
model_callbacks = [
    tf.keras.callbacks.CSVLogger(
        'meta-without.log', separator=',', append=True
    ),
    tf.keras.callbacks.EarlyStopping(monitor='binary_accuracy', patience=3),
    tf.keras.callbacks.History(),
    tf.keras.callbacks.TensorBoard(log_dir="./logs-without")
]

## Preparation of the Datasets

In [13]:
train_df, test_df = np.split(dataframe.sample(frac=1, random_state=RANDOM_STATE),
                             [int(0.8 * len(dataframe))])

## Training of the Model

In [14]:
history = model.fit(
    x=np.array(train_df.text),
    y=np.array(train_df.spam),
    validation_split=0.15,
    epochs=5,
    callbacks= model_callbacks
)

Trial 10 Complete [00h 00m 15s]
val_binary_accuracy: 0.9891641139984131

Best val_binary_accuracy So Far: 0.99071204662323
Total elapsed time: 00h 02m 42s
INFO:tensorflow:Oracle triggered exit
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: .\auto_model\best_model\assets


## Save the best Model

In [15]:
best_model = model.tuner.get_best_model()
best_model.save(pathlib.Path.joinpath(OUTPUT_FOLDER_PATH, 'transformer-metadata.tf'))

INFO:tensorflow:Assets written to: D:\NLP-22-23\Transformer\without meta\transformer-metadata.tf\assets


# Evaluate the Model Performance
## Load the best Model from Drive

In [16]:
best_model = tf.keras.models.load_model(pathlib.Path.joinpath(OUTPUT_FOLDER_PATH, 'transformer-metadata.tf'))

In [17]:
 results = best_model.evaluate(
    x= np.array(test_df.text),
    y= np.array(test_df.spam),
    verbose= 0
)

In [18]:
print(f'''
--- Model Performance ---
loss = {results[0]}
accuracy = {results[1]}
precision = {results[2]}
recall = {results[3]}
''')


--- Model Performance ---
loss = 0.04976167529821396
accuracy = 0.9834206104278564
precision = 0.980988621711731
recall = 0.9485294222831726

