In [1]:
# SVM Training & Anwendung auf Punktwolken (Jupyter Notebook Style)

## 🧪 Teil 1: Training

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
import joblib
import pickle
import os
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import time

start_time = time.time()
# 1. Daten einlesen (ohne Header)
input_datei = r"arbeitspakete\01_klassifizierung\07_SDG\input\PW_Klass_P3A1_gesamt_normalisiert.txt"
output_path = r"arbeitspakete\01_klassifizierung\07_SDG\output"
df = pd.read_csv(input_datei, sep=";", header=None, decimal=".")

# 1.1 Spaltennamen zuweisen
columns = [
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir",
    "Label"
]
df.columns = columns

# 2. Label-Encoding
df["Label"] = df["Label"].astype("category")
df["LabelEncoded"] = df["Label"].cat.codes
label_mapping = dict(enumerate(df["Label"].cat.categories))

# Optional: Visualisierung der Label-Verteilung
df["Label"].value_counts().plot(kind='bar', title="Label Verteilung")
plt.tight_layout()
plt.savefig("label_verteilung.png")
plt.close()

# 3. Features & Ziel definieren
X = df[[
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]]
# X = df[[
#     "Hue", "Z_dir"
# ]]
y = df["LabelEncoded"]

# 3.1 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Skalierung (fit mit DataFrame!)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Modell trainieren mit Optuna und SGDClassifier
import optuna
from sklearn.model_selection import cross_val_score

# 6. Klassifikationsbericht & Confusion Matrix
model_name = "v3_H_nz"
result_dir = f"Resultate_SGDClassifier_{model_name}"
os.makedirs(result_dir, exist_ok=True)
parts2 = result_dir.split("_")
classifier, model_name = parts2[-2], parts2[-1] 



# Optuna-Ziel-Funktion definieren
def objective(trial):
    alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
    loss = trial.suggest_categorical("loss", ["hinge", "log_loss", "modified_huber"])
    penalty = trial.suggest_categorical("penalty", ["l2", "elasticnet"])

    clf = SGDClassifier(
        alpha=alpha,
        loss=loss,
        penalty=penalty,
        max_iter=1000,
        early_stopping=True,
        n_iter_no_change=5,
        tol=1e-3,
        random_state=42,
        verbose=0
    )
    score = cross_val_score(clf, X_train_scaled, y_train, cv=3).mean()
    return 1.0 - score  # Fehler minimieren

# Optuna-Studie ausführen
study = optuna.create_study()
study.optimize(objective, n_trials=30)

# Optimierungshistorie plotten
import optuna.visualization as vis
fig = vis.plot_optimization_history(study)
fig.write_image(os.path.join(f"Resultate_{classifier}_{model_name}", "optuna_optimization_history.png"))
print("📊 Optimierungshistorie gespeichert.")

# Interaktive HTML-Version speichern
vis.plot_optimization_history(study).write_html(os.path.join(f"Resultate_{classifier}_{model_name}", "optuna_optimization_history.html"))
vis.plot_param_importances(study).write_html(os.path.join(f"Resultate_{classifier}_{model_name}", "optuna_param_importance.html"))
print("🧠 Parameter-Visualisierungen gespeichert (HTML)")

# Bestes Modell mit besten Parametern trainieren
print("🔍 Beste Parameter:", study.best_params)
model = SGDClassifier(**study.best_params, max_iter=1000, early_stopping=True, n_iter_no_change=5, tol=1e-3, random_state=42)
model.fit(X_train_scaled, y_train)

# 6. Trainingsverlust & Score visualisieren
# Trainings- und Test-Score anzeigen und speichern
training_score = model.score(X_train_scaled, y_train)
test_score = model.score(X_test_scaled, y_test)

score_info = (
    f"Trainings-Score: {training_score:.4f}\n"
    f"Test-Score: {test_score:.4f}\n"
)

print("✅ Training abgeschlossen mit folgenden Scores:")
print(score_info)

# Speichern der Scores
with open(os.path.join(result_dir, "training_scores.txt"), "w") as f:
    f.write(score_info)

# Aber wir können den Score pro Epoche über 'validation_scores_' visualisieren, falls vorhanden
if hasattr(model, "validation_scores_"):
    plt.plot(model.validation_scores_)
    plt.title("Validation Score pro Epoche")
    plt.xlabel("Epoche")
    plt.ylabel("Score")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, "validation_score_plot.png"))
    plt.close()
    print("📈 Validation Score Verlauf gespeichert.")



# Klassifikationsbericht speichern
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred, target_names=df["Label"].cat.categories)
with open(os.path.join(result_dir, "klassifikationsbericht.txt"), "w", encoding="utf-8") as f:
    f.write(report)
print("📋 Klassifikationsbericht gespeichert.")

# Confusion Matrix speichern
disp = ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=df["Label"].cat.categories, cmap="Blues", xticks_rotation=45)
disp.figure_.savefig(os.path.join(result_dir, "confusion_matrix.png"))
plt.close()
print("📊 Confusion Matrix gespeichert.")

# 7. Modell, Scaler, Mapping speichern
joblib.dump(model, os.path.join(result_dir, "sgd_model.pkl"))
joblib.dump(scaler, os.path.join(result_dir, "scaler.pkl"))
with open(os.path.join(result_dir, "label_mapping.pkl"), "wb") as f:
    pickle.dump(label_mapping, f)

print("✅ Training abgeschlossen und alles gespeichert.")

# 8. Zeit anzeigen
elapsed = time.time() - start_time
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"⏱️ Laufzeit: {int(h):02d}:{int(m):02d}:{s:05.2f} (Std:Min:Sek)")


## 🚀 Teil 2: Anwendung auf neue Punktwolke

import time
from pathlib import Path

start_time = time.time()

# 1. Modell und Scaler laden
model = joblib.load(os.path.join(result_dir, "sgd_model.pkl"))
scaler = joblib.load(os.path.join(result_dir, "scaler.pkl"))
with open(os.path.join(result_dir, "label_mapping.pkl"), "rb") as f:
    label_mapping = pickle.load(f)

# 2. Neue Punktwolke laden
input_file = Path("PW_P3A2_normalisiert.txt")
df = pd.read_csv(input_file, sep=";", header=None)

# 3. Dateiname auslesen
file_name = input_file.stem
parts = file_name.split("_")
objekt, klass = parts[0], parts[1] if len(parts) >= 2 else ("Unknown", "Unknown")
parts2 = result_dir.split("_")
classifier, model_name = parts2[-2], parts2[-1] 
file_name_neu = f"{objekt}_{klass}_{classifier}_{model_name}"

# 4. Spaltennamen definieren (ohne Label!)
df.columns = [
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]

# 5. Feature-Auswahl
X_new = df[[
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]]
# X_new = df[[
#     "Hue", "Z_dir"
# ]]

# 6. Skalieren & Klassifizieren
X_new_scaled = scaler.transform(X_new)
predicted_labels = model.predict(X_new_scaled)
df["Predicted_Label"] = predicted_labels
df["Label_Decoded"] = pd.Series(predicted_labels).map(label_mapping)

# 7. Speichern
# 7.1 Gesamte Punktwolke mit Vorhersage als TXT
output_txt = os.path.join(result_dir, f"{file_name_neu}.txt")
df.to_csv(output_txt, sep=";", index=False)
print(f"✅ Punktwolke mit Vorhersage gespeichert als {output_txt}")

# 7.2 Punktwolke nach Klassen aufteilen und speichern
for label_value, gruppe in df.groupby("Label_Decoded"):
    klass_datei = os.path.join(result_dir, f"{file_name_neu}_klasse_{label_value}.txt")
    gruppe.to_csv(klass_datei, sep=";", index=False)
    print(f"✅ Klasse '{label_value}' gespeichert als {klass_datei}")

# 8. Zeit anzeigen
elapsed = time.time() - start_time
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"⏱️ Laufzeit: {int(h):02d}:{int(m):02d}:{s:05.2f} (Std:Min:Sek)")


[I 2025-05-23 09:22:34,122] A new study created in memory with name: no-name-c805d795-f2ed-44cf-9678-6020531d0ccd
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
[I 2025-05-23 09:23:30,357] Trial 0 finished with value: 0.10122320457819634 and parameters: {'alpha': 0.001600613163369407, 'loss': 'hinge', 'penalty': 'l2'}. Best is trial 0 with value: 0.10122320457819634.
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
[I 2025-05-23 09:24:23,795] Trial 1 finished with value: 0.12167256684864636 and parameters: {'alpha': 0.013773442043334794, 'loss': 'modified_huber', 'penalty': 'l2'}. Best is trial 0 with value: 0.10122320457819634.
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
[I 2025-05-23 09:25:21,257] Trial 2 finished with value: 0.0863361693797553 and parameters: {'alpha': 3.0494647032765408e-06, 'loss': 'log_loss', 'penalty': 'l2'}. Best is trial 2 with value: 0.0863361693797553.
  alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
[I 2025-05-23 09:26:23,

📊 Optimierungshistorie gespeichert.
🧠 Parameter-Visualisierungen gespeichert (HTML)
🔍 Beste Parameter: {'alpha': 8.277288173723573e-06, 'loss': 'log_loss', 'penalty': 'l2'}
✅ Training abgeschlossen mit folgenden Scores:
Trainings-Score: 0.9124
Test-Score: 0.9117

📋 Klassifikationsbericht gespeichert.
📊 Confusion Matrix gespeichert.
✅ Training abgeschlossen und alles gespeichert.
⏱️ Laufzeit: 00:30:23.08 (Std:Min:Sek)
✅ Punktwolke mit Vorhersage gespeichert als Resultate_SGDClassifier_v\PW_P3A2_SGDClassifier_v.txt
✅ Klasse 'Bäume' gespeichert als Resultate_SGDClassifier_v\PW_P3A2_SGDClassifier_v_klasse_Bäume.txt
✅ Klasse 'Fassade' gespeichert als Resultate_SGDClassifier_v\PW_P3A2_SGDClassifier_v_klasse_Fassade.txt
✅ Klasse 'Ground' gespeichert als Resultate_SGDClassifier_v\PW_P3A2_SGDClassifier_v_klasse_Ground.txt
✅ Klasse 'Schrägdach' gespeichert als Resultate_SGDClassifier_v\PW_P3A2_SGDClassifier_v_klasse_Schrägdach.txt
✅ Klasse 'Strasse' gespeichert als Resultate_SGDClassifier_v\PW_P