In [10]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler


In [7]:
test_weighted_df = pd.read_csv('src/test_weighted.csv')
train_weighted_df = pd.read_csv('src/train_weighted.csv')

In [13]:
test_weighted_df.isna().sum()

transcript_id            0
position                 0
flanking_nucleotide      0
dwelling_time_(-1)       1
mean_signal_(-1)         1
mean_range_(-1)          1
dwelling_time_central    1
mean_signal_central      1
mean_range_central       1
dwelling_time_(+1)       1
mean_signal_(+1)         1
mean_range_(+1)          1
label                    0
dtype: int64

In [14]:
train_weighted_df.isna().sum()

transcript_id            0
position                 0
flanking_nucleotide      0
dwelling_time_(-1)       0
mean_signal_(-1)         0
mean_range_(-1)          0
dwelling_time_central    0
mean_signal_central      0
mean_range_central       0
dwelling_time_(+1)       0
mean_signal_(+1)         0
mean_range_(+1)          0
label                    0
dtype: int64

In [15]:
test_weighted_df = test_weighted_df.dropna()
test_weighted_df.isna().sum()

transcript_id            0
position                 0
flanking_nucleotide      0
dwelling_time_(-1)       0
mean_signal_(-1)         0
mean_range_(-1)          0
dwelling_time_central    0
mean_signal_central      0
mean_range_central       0
dwelling_time_(+1)       0
mean_signal_(+1)         0
mean_range_(+1)          0
label                    0
dtype: int64

In [16]:
features = [
    'position', 'dwelling_time_(-1)', 'mean_signal_(-1)', 'mean_range_(-1)',
    'dwelling_time_central', 'mean_signal_central', 'mean_range_central',
    'dwelling_time_(+1)', 'mean_signal_(+1)', 'mean_range_(+1)'
]

X_train = train_weighted_df[features]
y_train = train_weighted_df['label']  # Use the correct label column for m6A

X_test = test_weighted_df[features]
y_test = test_weighted_df['label']  # Use the correct label column for m6A

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

In [18]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7638347622759158
Confusion Matrix:
 [[955 283]
 [ 20  25]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.77      0.86      1238
           1       0.08      0.56      0.14        45

    accuracy                           0.76      1283
   macro avg       0.53      0.66      0.50      1283
weighted avg       0.95      0.76      0.84      1283

