## TensorFlow Model Creation

In [1]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


In [2]:
current = current = os.getcwd()
parent = parent_dir = os.path.dirname(current)
df = pd.read_csv(parent_dir+"/data/joint_data_collection_8targets.csv")
print(f"Dataset Shape: {df.shape}")
print(f"Spalten: {df.columns.tolist()}")

Dataset Shape: (17400, 25)
Spalten: ['Unnamed: 0', 'Engine type_In-line four, four-stroke', 'Engine type_In-line three, four-stroke', 'Engine type_Single cylinder, four-stroke', 'Engine type_Single cylinder, two-stroke', 'Engine type_Twin, four-stroke', 'Engine type_Twin, two-stroke', 'Engine type_Two cylinder boxer, four-stroke', 'Engine type_V2, four-stroke', 'Engine type_V4, four-stroke', 'Transmission type,final drive_Belt', 'Transmission type,final drive_Chain', 'Transmission type,final drive_Shaft drive (cardan)', 'Front brakes_Double disc', 'Front brakes_Dual disc', 'Front brakes_Expanding brake', 'Front brakes_Expanding brake (drum brake)', 'Front brakes_Single disc', 'Rear brakes_Expanding brake', 'Rear brakes_Expanding brake (drum brake)', 'Rear brakes_Single disc', 'Displacement ccm', 'Fuel capacity liters', 'Power HP', 'Category']


In [3]:
target_column = 'Category'  # <-- HIER ANPASSEN!

# Features (alle numerischen Spalten) und Target (Text) trennen
X = df.drop(target_column, axis=1).values
y_text = df[target_column].values

print(f"\nFeatures Shape: {X.shape}")
print(f"Target (Text) Shape: {y_text.shape}")
print(f"Unique Target Values: {len(np.unique(y_text))}")
print(f"Target Klassen: {np.unique(y_text)}")

# Text-Target zu numerischen Labels konvertieren
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_text)

print(f"\nEncoded Target Shape: {y.shape}")
print(f"Label Mapping:")
for i, label in enumerate(label_encoder.classes_):
    print(f"  {i}: {label}")

num_classes = len(label_encoder.classes_)

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTraining Samples: {len(X_train)}")
print(f"Test Samples: {len(X_test)}")



Features Shape: (17400, 24)
Target (Text) Shape: (17400,)
Unique Target Values: 8
Target Klassen: ['Classic' 'Cross / motocross' 'Naked bike' 'Scooter' 'Sport'
 'Super motard' 'Touring' 'Unspecified category']

Encoded Target Shape: (17400,)
Label Mapping:
  0: Classic
  1: Cross / motocross
  2: Naked bike
  3: Scooter
  4: Sport
  5: Super motard
  6: Touring
  7: Unspecified category

Training Samples: 13920
Test Samples: 3480


In [4]:
model = keras.Sequential([
    keras.layers.Dense(512, activation='softmax', input_shape=(X_train.shape[1],)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(512, activation='softmax'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(512, activation='softmax'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(512, activation='softmax'),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Model Summary
print("\n" + "="*60)
print("MODEL ARCHITEKTUR")
print("="*60)
model.summary()



MODEL ARCHITEKTUR


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
def plotTrainingAndValidationPerformance(epochs, accuracy, val_accuracy, loss, val_loss):
    """
    This function creates and stores the (1) accuracy plot and (2) loss plot
    for training and validation performances and stores them as (a) png and (b) pdf file
    at the 'learningBase' of docker volume 'ai_system'.
    - loss: categorical_crossentropy
    Computes the crossentropy loss between the labels and predictions.
    This loss is the crossentropy metric class to be used when there are multiple label classes (2 or more). 
    Here it is assumed that labels are given as a `one_hot` representation. 
    For instance, when `labels` are [2, 0, 1], `y_true` = [[0, 0, 1], [1, 0, 0], [0, 1, 0]].
    remember: a lower validation loss indicates a better model.
    - accuracy: 
    This metric creates two local variables, `total` and `count` 
    that are used to compute the frequency with which `y_pred` matches `y_true`. 
    This frequency is ultimately returned as `binary accuracy`: an idempotent operation that simply divides `total` by `count`.
    remember: a higher validation accuracy indicates a better model.
    """
    
    # initialize figure
    plt.figure(figsize=(15, 15))
    
    # characterize accuracy plot
    plt.subplot(2, 2, 1)
    plt.plot(epochs, accuracy, label="Training Accuracy")
    plt.plot(epochs, val_accuracy, label="Validation Accuracy")
    plt.legend(loc="lower right")
    plt.title("Training and Validation Accuracy")
    
    # characterize loss plot
    plt.subplot(2, 2, 2)
    plt.plot(epochs, loss, label="Training Loss")
    plt.plot(epochs, val_loss, label="Validation Loss")
    plt.legend(loc="upper right")
    plt.title("Training and Validation Loss")
    
    # indicate performance by showing plot generated (having displays connected)
    #plt.show()
    
    # indicate performance by storing the plot as png and pdf file
    plt.savefig('TrainingPerformance.png')
    plt.savefig('TrainingPerformance.pdf')

In [None]:
epochen=3000
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=100,
        restore_best_weights=True,
        verbose=1,
        min_delta=0.0001
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-7,
        verbose=1,
        min_delta=0.0001
    )
   
]

# Training starten
print(f"\n{'='*60}")
print(f"TRAINING STARTED")
print(f"{'='*60}")

start_time = datetime.now()

history = model.fit(
    X_train, y_train,
    epochs=epochen,
    batch_size=256,
    validation_split=0.2,
    verbose=1
)

#callbacks=callbacks,
end_time = datetime.now()
training_duration = (end_time - start_time).total_seconds()

# Training Metriken extrahieren
final_epoch = len(history.history['loss'])
final_train_loss = history.history['loss'][-1]
final_train_accuracy = history.history['accuracy'][-1]
final_val_loss = history.history['val_loss'][-1]
final_val_accuracy = history.history['val_accuracy'][-1]

# Best Epoch finden
best_epoch = np.argmin(history.history['val_loss']) + 1
best_val_loss = np.min(history.history['val_loss'])
best_val_accuracy = history.history['val_accuracy'][best_epoch - 1]

# Training Summary ausgeben
print(f"\n{'='*60}")
print(f"TRAINING SUMMARY")
print(f"{'='*60}")
print(f"Total Training Iterations (Epochs): {final_epoch}")
print(f"Training Duration: {training_duration:.2f} seconds ({training_duration/60:.2f} minutes)")
print(f"\nFinal Epoch ({final_epoch}):")
print(f"  - Training Loss: {final_train_loss:.6f}")
print(f"  - Training Accuracy: {final_train_accuracy:.6f} ({final_train_accuracy*100:.2f}%)")
print(f"  - Validation Loss: {final_val_loss:.6f}")
print(f"  - Validation Accuracy: {final_val_accuracy:.6f} ({final_val_accuracy*100:.2f}%)")
print(f"\nBest Epoch ({best_epoch}):")
print(f"  - Validation Loss: {best_val_loss:.6f}")
print(f"  - Validation Accuracy: {best_val_accuracy:.6f} ({best_val_accuracy*100:.2f}%)")

# Test Set Evaluation
print(f"\n{'='*60}")
print(f"TEST SET EVALUATION")
print(f"{'='*60}")

test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.6f}")
print(f"Test Accuracy: {test_accuracy:.6f} ({test_accuracy*100:.2f}%)")

# Predictions
y_pred_probs = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)

# Zurück zu Text-Labels
y_test_text = label_encoder.inverse_transform(y_test)
y_pred_text = label_encoder.inverse_transform(y_pred)
plotTrainingAndValidationPerformance(range(epochen), history.history["accuracy"], history.history["val_accuracy"],history.history["loss"], history.history["val_loss"])
model.save("/models/bestmodel.h5", save_format="h5")
    


TRAINING STARTED
Epoch 1/3000
