# 🧬 SVM Mutation Prediction
This notebook performs mutation label prediction using a classical Support Vector Machine (SVM) algorithm.

In [None]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

In [None]:
# Load dataset
df = pd.read_csv('final_combined_mutation_dataset.csv')
df.head()

In [None]:
# Preprocess: fix Encoded_Sequence and clean column names
df.columns = [col.strip().lower() for col in df.columns]  # normalize column names

# Convert 'encoded_sequence' from string to list, then expand into columns
df['encoded_sequence'] = df['encoded_sequence'].apply(ast.literal_eval)
sequence_df = df['encoded_sequence'].apply(pd.Series)
sequence_df.columns = [f'encoded_sequence_{i}' for i in range(sequence_df.shape[1])]

# Final dataset
df = pd.concat([df.drop(columns=['encoded_sequence']), sequence_df], axis=1)
df.head()

In [None]:
# Features and target
X = df.drop('mutation_label', axis=1)
y = df['mutation_label']

In [None]:
# Train-test split and normalization
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train SVM classifier
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train_scaled, y_train)

In [None]:
# Predict and evaluate
y_pred = svm_model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print(f'SVM Test Accuracy: {acc:.4f}')

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - SVM')
plt.show()

In [None]:
# Save predictions to CSV
pred_df = pd.DataFrame({
    'True_Label': y_test.values,
    'Predicted_Label': y_pred
})
pred_df.to_csv('svm_mutation_predictions.csv', index=False)
print('Predictions saved to svm_mutation_predictions.csv')