In [1]:
import os
import sys
import spacy
import tensorflow as tf
from typing import Callable
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import TextVectorization

sys.path.append('..\\src')

from data_loading import DataLoading
from helper import Helper

### Data Loading

In [2]:
def load_data(config: dict[str, any], script_dir: str) -> tuple:
    """
    Load various data needed for prediction.

    Args:
        config (dict[str, any]): Configuration parameters.
        parent_script_dir: str: The script directory path.

    Returns:
        tuple: Tuple containing loaded data.
    """
    intents_path: str = os.path.join(script_dir, config['paths']['intents'])
    trained_nlp_path: str = os.path.join(script_dir, config['paths']['nlp'])
    contractions_path: str = os.path.join(script_dir, config['paths']['contractions'])
    preprocessing_functions_path: str = os.path.join(script_dir, config['paths']['preprocessing_functions'])
    job_queue_vectorizer_path: str = os.path.join(script_dir, config['paths']['vectorizer'])
    job_queue_label_encoder_path: str = os.path.join(script_dir, config['paths']['label_encoder'])
    job_queue_model_path: str = os.path.join(script_dir, config['paths']['model'])
    
    intents: dict[str, list[str]] = DataLoading.load_intents(intents_path)
    trained_nlp: spacy.Language = DataLoading.load_trained_nlp(trained_nlp_path)
    contractions: dict[str, str] = DataLoading.load_contractions(contractions_path)
    preprocessing_functions: dict[str, Callable[[str], str]] = DataLoading.load_preprocessing_functions(preprocessing_functions_path)
    job_queue_vectorizer: dict[dict[str, object], list[str]] = DataLoading.load_job_queue_vectorizer(job_queue_vectorizer_path)
    job_queue_label_encoder: LabelEncoder = DataLoading.load_label_encoder(job_queue_label_encoder_path)
    job_queue_model: tf.keras.models.Sequential = DataLoading.load_keras_model(job_queue_model_path)

    return intents, trained_nlp, contractions, preprocessing_functions, job_queue_vectorizer, job_queue_label_encoder, job_queue_model

In [3]:
parent_script_dir: str = os.path.dirname(os.getcwd())
config_path: str = os.path.join(parent_script_dir, 'config.json')
config: dict[str, any] = DataLoading.load_config(config_path)

(intents, trained_nlp, contractions, preprocessing_functions, job_queue_vectorizer, job_queue_label_encoder, job_queue_model) = load_data(config, parent_script_dir)

### Utterance Preprocessing

In [4]:
tf.random.set_seed(42)
vectorizer: TextVectorization = TextVectorization.from_config(job_queue_vectorizer["config"])
vectorizer.set_weights(job_queue_vectorizer["weights"])

In [5]:
helper = Helper()
job_queue_error: str = "Die E-Mail-Adresse 'test.bobl@axians-infoma.com' ist ungültig."

preprocessed_job_queue_error: str = helper.process_job_queue_error(job_queue_error, trained_nlp, 
                                                              preprocessing_functions, contractions, config)
preprocessed_job_queue_error

'e mal Adresse -- entitätsemail -- ungültig'

### Model Prediction

In [6]:
helper.calculate_prediction(preprocessed_job_queue_error, intents, 
                            vectorizer, job_queue_label_encoder, job_queue_model)

print(helper.prediction_array)
print(helper.prediction)
print(helper.confidence)

[[0.         0.         0.         0.         0.         0.
  0.         0.         0.00000001 0.         0.         0.
  0.00001021 0.0000053  0.         0.         0.         0.
  0.         0.         0.         0.00000159 0.         0.
  0.         0.         0.         0.         0.00006058 0.
  0.99992    0.         0.         0.00000234 0.         0.        ]]
Diese Fehlermeldung tritt auf, wenn eine E-Mail-Adresse ungültig ist. Eine gültige E-Mail-Adresse ist nicht leer und enthält nur ASCII-Zeichen.
 Um die Fehlermeldung zu beheben, stellen Sie sicher, das keine E-Mail-Adresse eines der folgenden Zeichen enthält: ß, ä, ö, ü, Ä, Ö, Ü.
0.99992
