# SVM Training & Anwendung auf Punktwolken (Jupyter Notebook Style)

## Teil 1: Training

#### Import der Funktionen

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib
import pickle
import os
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import time
from pathlib import Path

#### Training des SGDClassifier mit Abbruchschranke

In [5]:

start_time = time.time()
# 1. Daten einlesen (ohne Header)
input_datei = "PW_Klass_P3A1_gesamt_normalisiert.txt"

df = pd.read_csv(input_datei, sep=";", header=None, decimal=".")

# 1.1 Spaltennamen zuweisen
columns = [
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir",
    "Label"
]
df.columns = columns

# 2. Label-Encoding
df["Label"] = df["Label"].astype("category")
df["LabelEncoded"] = df["Label"].cat.codes
label_mapping = dict(enumerate(df["Label"].cat.categories))

# Optional: Visualisierung der Label-Verteilung
df["Label"].value_counts().plot(kind='bar', title="Label Verteilung")
plt.tight_layout()
plt.savefig("label_verteilung.png")
plt.close()

# 3. Features & Ziel definieren
X = df[[
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]]
y = df["LabelEncoded"]

# 3.1 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Skalierung (fit mit DataFrame!)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Modell trainieren mit SGDClassifier
# Parameterübersicht:
# - loss="hinge": klassischer SVM-Loss (lineare Trennung)
# - max_iter=1000: maximale Anzahl Trainingsdurchläufe (Epochen)
# - tol=1e-3: Abbruch, wenn sich das Ergebnis kaum noch verbessert
# - early_stopping=True: bricht ab, wenn sich der Score nicht verbessert
# - n_iter_no_change=5: Toleranz für Early Stopping (wie viele Epochen keine Verbesserung?)
# - verbose=1: zeigt Trainingsfortschritt im Terminal
from sklearn.linear_model import SGDClassifier

# Klassifikationsbericht & Confusion Matrix
model_name = "v3"
result_dir = f"Resultate_SGDClassifier_{model_name}"
os.makedirs(result_dir, exist_ok=True)

#model = SGDClassifier(loss="hinge", max_iter=1000, tol=1e-3, early_stopping=True, n_iter_no_change=5, verbose=1)
model = SGDClassifier(
    loss="hinge",
    penalty="l2",
    alpha=1e-5,
    max_iter=5000,
    tol=1e-4,
    early_stopping=False,
    verbose=1
)

model.fit(X_train_scaled, y_train)

# 6. Trainingsverlust & Score visualisieren
# Trainings- und Test-Score anzeigen und speichern
training_score = model.score(X_train_scaled, y_train)
test_score = model.score(X_test_scaled, y_test)

score_info = f"Trainings-Score: {training_score:.4f}"
f"Test-Score: {test_score:.4f}"

print("✅" + score_info)
with open(os.path.join(result_dir, "training_scores.txt"), "w") as f:
    f.write(score_info)

f"Test-Score: {test_score:.4f}"

print("✅" + score_info)
with open(os.path.join(result_dir, "training_scores.txt"), "w") as f:
    f.write(score_info)
    
# Aber wir können den Score pro Epoche über 'validation_scores_' visualisieren, falls vorhanden
if hasattr(model, "validation_scores_"):
    plt.plot(model.validation_scores_)
    plt.title("Validation Score pro Epoche")
    plt.xlabel("Epoche")
    plt.ylabel("Score")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, "validation_score_plot.png"))
    plt.close()
    print("📈 Validation Score Verlauf gespeichert.")



# Klassifikationsbericht speichern
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred, target_names=df["Label"].cat.categories)
with open(os.path.join(result_dir, "klassifikationsbericht.txt"), "w", encoding="utf-8") as f:
    f.write(report)
print("📋 Klassifikationsbericht gespeichert.")

# Confusion Matrix speichern
disp = ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=df["Label"].cat.categories, cmap="Blues", xticks_rotation=45)
disp.figure_.savefig(os.path.join(result_dir, "confusion_matrix.png"))
plt.close()
print("📊 Confusion Matrix gespeichert.")

# 7. Modell, Scaler, Mapping speichern
joblib.dump(model, os.path.join(result_dir, "sgd_model.pkl"))
joblib.dump(scaler, os.path.join(result_dir, "scaler.pkl"))
with open(os.path.join(result_dir, "label_mapping.pkl"), "wb") as f:
    pickle.dump(label_mapping, f)

print("✅ Training abgeschlossen und alles gespeichert.")

# 8. Zeit anzeigen
elapsed = time.time() - start_time
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"⏱️ Laufzeit: {int(h):02d}:{int(m):02d}:{s:05.2f} (Std:Min:Sek)")


-- Epoch 1
Norm: 11.76, NNZs: 12, Bias: -2.180890, T: 1969499, Avg. loss: 0.305758
Total training time: 0.54 seconds.
-- Epoch 2
Norm: 10.20, NNZs: 12, Bias: -2.123487, T: 3938998, Avg. loss: 0.173694
Total training time: 1.06 seconds.
-- Epoch 3
Norm: 9.75, NNZs: 12, Bias: -2.010385, T: 5908497, Avg. loss: 0.167487
Total training time: 1.58 seconds.
-- Epoch 4
Norm: 9.57, NNZs: 12, Bias: -2.005424, T: 7877996, Avg. loss: 0.164973
Total training time: 2.10 seconds.
-- Epoch 5
Norm: 9.45, NNZs: 12, Bias: -1.864002, T: 9847495, Avg. loss: 0.163574
Total training time: 2.62 seconds.
-- Epoch 6
Norm: 9.43, NNZs: 12, Bias: -1.911308, T: 11816994, Avg. loss: 0.162793
Total training time: 3.14 seconds.
-- Epoch 7
Norm: 9.56, NNZs: 12, Bias: -1.906823, T: 13786493, Avg. loss: 0.162194
Total training time: 3.66 seconds.
-- Epoch 8
Norm: 9.29, NNZs: 12, Bias: -1.947269, T: 15755992, Avg. loss: 0.161758
Total training time: 4.19 seconds.
-- Epoch 9
Norm: 9.42, NNZs: 12, Bias: -1.943258, T: 177254


## Teil 2: Anwendung auf neue Punktwolke


In [6]:
start_time = time.time()

result_dir = "Resultate_SGDClassifier_v3"
# 1. Modell und Scaler laden
model = joblib.load(os.path.join(result_dir, "sgd_model.pkl"))
scaler = joblib.load(os.path.join(result_dir, "scaler.pkl"))
with open(os.path.join(result_dir, "label_mapping.pkl"), "rb") as f:
    label_mapping = pickle.load(f)

# 2. Neue Punktwolke laden
input_file = Path("PW_P3A2_normalisiert.txt")
df = pd.read_csv(input_file, sep=";", header=None)

# 3. Dateiname auslesen
file_name = input_file.stem
parts = file_name.split("_")
objekt, klass = parts[0], parts[1] if len(parts) >= 2 else ("Unknown", "Unknown")
parts2 = result_dir.split("_")
classifier, model_name = parts2[-2], parts2[-1] 
file_name_neu = f"{objekt}_{klass}_{classifier}_{model_name}"

# 4. Spaltennamen definieren (ohne Label!)
df.columns = [
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]

# 5. Feature-Auswahl
X_new = df[[
    "X", "Y", "Z",
    "Red", "Green", "Blue",
    "Hue", "Saturation", "Value",
    "X_dir", "Y_dir", "Z_dir"
]]

# 6. Skalieren & Klassifizieren
X_new_scaled = scaler.transform(X_new)
predicted_labels = model.predict(X_new_scaled)
df["Predicted_Label"] = predicted_labels
df["Label_Decoded"] = pd.Series(predicted_labels).map(label_mapping)

# 7. Speichern
# 7.1 Gesamte Punktwolke mit Vorhersage als TXT
output_txt = os.path.join(result_dir, f"{file_name_neu}.txt")
df.to_csv(output_txt, sep=";", index=False)
print(f"✅ Punktwolke mit Vorhersage gespeichert als {output_txt}")

# 7.2 Punktwolke nach Klassen aufteilen und speichern
for label_value, gruppe in df.groupby("Label_Decoded"):
    klass_datei = os.path.join(result_dir, f"{file_name_neu}_{label_value}.txt")
    gruppe.to_csv(klass_datei, sep=";", index=False)
    print(f"✅ Klasse '{label_value}' gespeichert als {klass_datei}")

# 8. Zeit anzeigen
elapsed = time.time() - start_time
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"⏱️ Laufzeit: {int(h):02d}:{int(m):02d}:{s:05.2f} (Std:Min:Sek)")


✅ Punktwolke mit Vorhersage gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3.txt
✅ Klasse 'Bäume' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Bäume.txt
✅ Klasse 'Fassade' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Fassade.txt
✅ Klasse 'Flachdach' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Flachdach.txt
✅ Klasse 'Ground' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Ground.txt
✅ Klasse 'Schrägdach' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Schrägdach.txt
✅ Klasse 'Strasse' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Strasse.txt
✅ Klasse 'Trottoir' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Trottoir.txt
✅ Klasse 'Wasser' gespeichert als Resultate_SGDClassifier_v3\PW_P3A2_SGDClassifier_v3_Wasser.txt
⏱️ Laufzeit: 00:01:07.82 (Std:Min:Sek)
