In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


# Support Vector Classifier

In [3]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

# Load the datasets
train_data = pd.read_csv("/content/drive/MyDrive/P2/T1/Dataset/combined/ML-c-train.csv")
valid_data = pd.read_csv("/content/drive/MyDrive/P2/T1/Dataset/combined/ML-c-val.csv")
test_data = pd.read_csv("/content/drive/MyDrive/P2/T1/Dataset/combined/ML-c-test.csv")

# Separate features and target
X_train, y_train = train_data.drop(columns=["disease_label"]), train_data["disease_label"]
X_valid, y_valid = valid_data.drop(columns=["disease_label"]), valid_data["disease_label"]
X_test, y_test = test_data.drop(columns=["disease_label"]), test_data["disease_label"]

# Ensure all feature columns are numeric
X_train = X_train.select_dtypes(include=["number"])
X_valid = X_valid.select_dtypes(include=["number"])
X_test = X_test.select_dtypes(include=["number"])

# Ensure the same columns in all splits
common_columns = X_train.columns.intersection(X_valid.columns).intersection(X_test.columns)
X_train = X_train[common_columns]
X_valid = X_valid[common_columns]
X_test = X_test[common_columns]

# Scale features (SVM requires scaled data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

# Train the SVM model
svm_model = SVC(kernel="linear", random_state=42, class_weight="balanced")
svm_model.fit(X_train, y_train)

# Validate the Model
y_valid_pred = svm_model.predict(X_valid)
val_report = classification_report(y_valid, y_valid_pred)
print("Validation Report (SVM):")
print(val_report)

# Test the Model
y_test_pred = svm_model.predict(X_test)
test_report = classification_report(y_test, y_test_pred)
print("Test Report (SVM):")
print(test_report)


Validation Report (SVM):
                                    precision    recall  f1-score   support

                    adenocarcinoma       0.89      0.95      0.92       163
                          adhesion       0.91      0.96      0.93       164
                     affect labile       0.95      0.91      0.93        79
                         arthritis       0.90      1.00      0.95       131
                            asthma       0.71      0.97      0.82        71
      benign prostatic hypertrophy       0.93      1.00      0.96        79
                  biliary calculus       0.96      0.79      0.87       190
                  carcinoma breast       0.98      0.94      0.96       170
chronic obstructive airway disease       0.92      0.87      0.89       143
                         cirrhosis       1.00      0.94      0.97       170
                           colitis       0.98      0.83      0.90       190
         coronary arteriosclerosis       0.75      0.87      0

# Saved Predicted Output in CSV

In [4]:
"""
import pandas as pd

# Save predictions (optional)
predictions = pd.DataFrame({"True Labels": y_test, "Predicted Labels": y_test_pred})
predictions.to_csv("/content/drive/MyDrive/P2/ML training/SVC_predictions.csv", index=False)
print("Predictions saved.")
"""

'\nimport pandas as pd\n\n# Save predictions (optional)\npredictions = pd.DataFrame({"True Labels": y_test, "Predicted Labels": y_test_pred})\npredictions.to_csv("/content/drive/MyDrive/P2/ML training/SVC_predictions.csv", index=False)\nprint("Predictions saved.")\n'

# Generate Classification Report

In [5]:

import pandas as pd
from sklearn.metrics import classification_report

# Save the classification report as CSV (optional)
report_dict = classification_report(y_test, y_test_pred, output_dict=True, zero_division=0)
report_df = pd.DataFrame(report_dict).transpose()
report_df.to_csv("/content/drive/MyDrive/P2/T1/Dataset/combined/ML/SVC-c-class.csv", index=True)
print("Classification report saved.")


Classification report saved.
