In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the training data
train_df = pd.read_csv('train.csv')

# Drop 'id' and separate features/target
X = train_df.drop(columns=['id', 'Target'])
y = train_df['Target']

# Encode target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling is important for SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train SVM classifier
svm_model = SVC(kernel='rbf', random_state=42)  # You can try 'linear', 'poly', etc.
svm_model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = svm_model.predict(X_val_scaled)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred, target_names=le.classes_))

# Load test and sample submission files
test_df = pd.read_csv('test.csv')
submission_df = pd.read_csv('sample_submission.csv')

# Prepare test features
X_test = test_df.drop(columns=['id'])
X_test_scaled = scaler.transform(X_test)

# Predict
test_preds_encoded = svm_model.predict(X_test_scaled)
test_preds = le.inverse_transform(test_preds_encoded)

# Prepare submission
submission_df['Target'] = test_preds
submission_df.to_csv('svm_submission.csv', index=False)
print("Submission saved as 'svm_submission.csv'")


Validation Accuracy: 0.8200470465237847

Classification Report:
               precision    recall  f1-score   support

     Dropout       0.91      0.80      0.85      5028
    Enrolled       0.63      0.57      0.60      3017
    Graduate       0.83      0.94      0.88      7259

    accuracy                           0.82     15304
   macro avg       0.79      0.77      0.78     15304
weighted avg       0.82      0.82      0.82     15304

Submission saved as 'svm_submission.csv'
