In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import time # To time the training

# --- Step 1: Load Your PROCESSED Data ---
try:
    train_df = pd.read_csv('./Data/salary.train.processed.csv', index_col='id')
    test_df = pd.read_csv('./Data/salary.test.processed.csv', index_col='id')
except FileNotFoundError:
    print("Error: Could not find the processed CSV files.")
    print("Please make sure 'salary.train.processed.csv' and 'salary.test.processed.csv' are in the './Data/' folder.")
    raise

# --- Step 2: Separate Features (X) and Target (y) ---
X_train = train_df.drop('label', axis=1)
y_train = train_df['label']
X_test = test_df.drop('label', axis=1)
y_test = test_df['label']

print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")

# --- Step 3: Initialize and Train the SVM Model ---
# We'll use the default RBF kernel for now.
# 'random_state=42' for reproducibility
svc = SVC(random_state=42)

print("\nTraining the SVM model... (This might take a few minutes)")
start_time = time.time()
svc.fit(X_train, y_train)
end_time = time.time()
print(f"Model training complete! Time taken: {end_time - start_time:.2f} seconds")

# --- Step 4: Evaluate the Model ---
print("\nEvaluating the model on the test set...")
y_pred_svc = svc.predict(X_test)

# Check accuracy
accuracy_svc = accuracy_score(y_test, y_pred_svc)
print(f"\nSVM Model Accuracy on Test Data: {accuracy_svc * 100:.2f}%")

# Get a detailed report
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svc,digits=4))

Training features shape: (16720, 89)
Testing features shape: (4180, 89)

Training the SVM model... (This might take a few minutes)
Model training complete! Time taken: 15.47 seconds

Evaluating the model on the test set...

SVM Model Accuracy on Test Data: 82.44%

SVM Classification Report:
              precision    recall  f1-score   support

         0.0     0.8594    0.8324    0.8457      2416
         1.0     0.7799    0.8135    0.7963      1764

    accuracy                         0.8244      4180
   macro avg     0.8196    0.8229    0.8210      4180
weighted avg     0.8258    0.8244    0.8249      4180

