In [1]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [2]:
def load_data(train_sequences_path, train_labels_path, test_sequences_path, test_labels_path):
    train_sequences = np.load(train_sequences_path)
    train_labels = np.load(train_labels_path)
    test_sequences = np.load(test_sequences_path)
    test_labels = np.load(test_labels_path)
    return train_sequences, train_labels, test_sequences, test_labels

In [3]:
def apply_pca(train_sequences, test_sequences, n_components=4):
    pca = PCA(n_components=n_components)
    train_sequences = pca.fit_transform(train_sequences)
    test_sequences = pca.transform(test_sequences)
    return train_sequences, test_sequences

In [4]:
def split_data(data, num_splits):
    return np.array_split(data, num_splits)

In [5]:
def train_and_predict_svc(train_sequences, train_labels, test_sequences, test_labels):
    svc = SVC()
    
    # Train the SVC on the training data
    svc.fit(train_sequences, train_labels)
    
    # Predict on the test data
    predictions = svc.predict(test_sequences)
    
    return predictions

In [6]:
# Load the preprocessed data
train_sequences, train_labels, test_sequences, test_labels = load_data(
    'train_sequences.npy', 'train_labels.npy', 'test_sequences.npy', 'test_labels.npy'
)

# Perform PCA transformation on the data
train_sequences, test_sequences = apply_pca(train_sequences, test_sequences, n_components=4)

# Split the data into 15 parts
num_splits = 15
split_train_sequences = split_data(train_sequences, num_splits)
split_train_labels = split_data(train_labels, num_splits)
split_test_sequences = split_data(test_sequences, num_splits)
split_test_labels = split_data(test_labels, num_splits)

# Collect all predictions and true labels
all_predictions = []
all_true_labels = []

for i in range(num_splits):
    train_seq_part = split_train_sequences[i]
    train_lbl_part = split_train_labels[i]
    test_seq_part = split_test_sequences[i]
    test_lbl_part = split_test_labels[i]
    
    predictions = train_and_predict_svc(train_seq_part, train_lbl_part, test_seq_part, test_lbl_part)
    
    all_predictions.extend(predictions)
    all_true_labels.extend(test_lbl_part)

# Generate the classification report
accuracy = accuracy_score(all_true_labels, all_predictions)
report = classification_report(all_true_labels, all_predictions)

# Print the accuracy and classification report
print(f'Accuracy: {accuracy}')
print(f'Classification report:\n{report}')

Accuracy: 0.76668
Classification report:
              precision    recall  f1-score   support

           0       0.84      0.66      0.74     12500
           1       0.72      0.88      0.79     12500

    accuracy                           0.77     25000
   macro avg       0.78      0.77      0.76     25000
weighted avg       0.78      0.77      0.76     25000

