# Transformer - ohne Metadaten
## Installment of Moduls

In [1]:
!pip install -r requirements.txt



You should consider upgrading via the 'D:\Final DEP\venv\Scripts\python.exe -m pip install --upgrade pip' command.


## Loading of the Moduls

In [1]:
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
import autokeras as ak

## Definition of Project Information
### Constants

In [2]:
RANDOM_STATE = 43
INPUT_FILE_NAME = "emails.csv"
BASE_MODEL_NAME = "distilbert-base-uncased"
OUTPUT_FOLDER_NAME = "output_model"

### Paths

In [3]:
NOTEBOOK_ROOT_PATH = pathlib.Path.cwd()
INPUT_FILE_PATH = pathlib.Path.joinpath(NOTEBOOK_ROOT_PATH, INPUT_FILE_NAME)
OUTPUT_FOLDER_PATH = pathlib.Path.joinpath(NOTEBOOK_ROOT_PATH, OUTPUT_FOLDER_NAME)

### GPU

In [4]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
tf.test.is_built_with_cuda(), tf.test.is_built_with_gpu_support()

(True, True)

## Load File to Dataframe

In [6]:
dataframe = pd.read_csv(INPUT_FILE_NAME)
dataframe.head()

Unnamed: 0,text,spam
0,Subject: naturally irresistible your corporate...,1
1,Subject: the stock trading gunslinger fanny i...,1
2,Subject: unbelievable new homes made easy im ...,1
3,Subject: 4 color printing special request add...,1
4,"Subject: do not have money , get software cds ...",1


# Model Generation
## Definition of the AutoKeras Model
### Input Objects

In [8]:
content_txt_input = ak.TextInput()

### Processing Block

In [22]:
content_txt_output = ak.TextBlock(block_type="transformer")(content_txt_input)
classification_head = ak.ClassificationHead(num_classes=2,multi_label=False, metrics="accuracy")(content_txt_output)

### Build Complete Model

In [26]:
model = ak.AutoModel(
    inputs= content_txt_input,
    outputs= classification_head,
    objective= "val_accuracy",
    max_trials= 10,
    overwrite= True,
    project_name= "Spam-Classificator",
    seed= RANDOM_STATE
)

## Preparation of the Datasets

In [28]:
train_df, test_df = np.split(dataframe.sample(frac=1, random_state=RANDOM_STATE),
                             [int(0.8 * len(dataframe))])

## Training of the Model

In [29]:
model.fit(
    x=np.array(train_df.text),
    y=np.array(train_df.spam),
    validation_split=0.15,
    epochs=5
)

Trial 10 Complete [00h 00m 18s]
val_accuracy: 0.7678018808364868

Best val_accuracy So Far: 0.9938080310821533
Total elapsed time: 00h 03m 26s
INFO:tensorflow:Oracle triggered exit
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: .\Spam-Classificator\best_model\assets


<keras.callbacks.History at 0x238295db2b0>

## Save the best Model

In [30]:
best_model = model.tuner.get_best_model()
best_model.save(pathlib.Path.joinpath(OUTPUT_FOLDER_PATH, 'transformer-metadata.tf'))

INFO:tensorflow:Assets written to: D:\NLP-22-23\Transformer\output_model\transformer-metadata.tf\assets


# Evaluate the Model Performance
## Load the best Model from Drive

In [33]:
best_model = tf.keras.models.load_model(pathlib.Path.joinpath(OUTPUT_FOLDER_PATH, 'transformer-metadata.tf'))

In [37]:
 results = best_model.evaluate(
    x= np.array(test_df.text),
    y= np.array(test_df.spam),
    verbose= 0
)
print(f'''
--- Model Performance ---
loss = {results[0]}
accuracy = {results[1]}
''')


--- Model Performance ---
loss = 0.028793204575777054
accuracy = 0.9895287752151489

