In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

import numpy as np

In [2]:
def load_data(file_path, reshape=True):
    with open(file_path, 'r') as file:
        data = file.readlines()

    processed_data = [
        np.array([
            list(map(float, row.strip().split())) for row in sample.strip()[1:-1].split(';')
        ]) for sample in data
    ]

    if reshape:
        processed_data = np.array([matrix.flatten() for matrix in processed_data])

    return processed_data

def load_labels(file_path):
    with open(file_path, 'r') as file:
        labels = file.readlines()

    labels = np.array([int(val) for line in labels for val in line.strip()[1:-1].split() if val.isdigit()])
    return labels

# Beispielaufruf der Funktionen mit Reshape
train_data_file = 'pems+sf/PEMS_train'
test_data_file = 'pems+sf/PEMS_test'
train_labels_file = 'pems+sf/PEMS_trainlabels'
test_labels_file = 'pems+sf/PEMS_testlabels'

train_data = load_data(train_data_file)
test_data = load_data(test_data_file)
train_labels = load_labels(train_labels_file)
test_labels = load_labels(test_labels_file)

print(f"Train Data Shape: {train_data.shape} samples")
print(f"Test Data Shape: {test_data.shape} samples")

Train Data Shape: (267, 138672) samples
Test Data Shape: (173, 138672) samples


In [3]:
# Anzahl der gewünschten Hauptkomponenten (zum Beispiel 10)
n_components = 10

# PCA anwenden auf Trainingsdaten
pca = PCA(n_components=n_components)
train_data_pca = pca.fit_transform(train_data)

# PCA auf Testdaten anwenden (mit denselben PCA-Transformationen wie auf den Trainingsdaten)
test_data_pca = pca.transform(test_data)

# Dimensionen der transformierten Daten überprüfen
print(f"Train Data Shape after PCA: {train_data_pca.shape}")
print(f"Test Data Shape after PCA: {test_data_pca.shape}")

# Explained Variance Ratio
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")

Train Data Shape after PCA: (267, 10)
Test Data Shape after PCA: (173, 10)
Explained variance ratio: [0.17262425 0.1493978  0.08044775 0.06843066 0.05305088 0.01986957
 0.01500646 0.01230996 0.01191902 0.01049317]


In [4]:
# Trainieren des Klassifikationsmodells (logistische Regression)
logreg = LogisticRegression(max_iter=1000)
logreg.fit(train_data_pca, train_labels)

# Vorhersagen für die Testdaten
predictions = logreg.predict(test_data_pca)

# Berechnen der Genauigkeit des Modells
accuracy = accuracy_score(test_labels, predictions)
print(f"Accuracy of Logistic Regression model: {accuracy * 100:.2f}%")

Accuracy of Logistic Regression model: 72.25%
