In [25]:
#importing important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder,StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
import joblib

In [2]:
#importing warning
import warnings 
warnings.filterwarnings("ignore")
warnings.warn("this will not show")


In [3]:
#Loading the dataset
df = pd.read_excel("sample_data.xlsx")
df.head()

Unnamed: 0,Income,LoanAmount,CreditScore,InterestRate,Target
0,85994,50587,520,15.23,0
1,50432,124440,458,4.81,0
2,84208,129188,451,21.17,1
3,31713,44799,743,7.07,0
4,20437,9139,633,6.51,0


In [18]:
#Seperate features and target variables
X = df.drop(columns=["Target"])
y = df["Target"]

#Seperate numeric and categorical columns
numeric_features = X.select_dtypes(include=['int64','float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

In [5]:
#Create transformers for data preprocessing
numeric_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='mean')),
('scaler',StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
('encoder',OrdinalEncoder(handle_unknown='use_encoded_value',unknown_value = -1))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,numeric_features),
        ('cat',categorical_transformer,categorical_features)])

In [6]:
# Create a pipeline with SVM model 
base_pipeline = Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('classifier',SVC())
])

In [19]:
#Train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train, y_test = train_test_split(X,y, random_state = 42, test_size = 0.2, stratify =y)


print("y type:", type(y_train))
print("y shape:", y_train.shape)

# If it's a DataFrame, show columns
if hasattr(y_train, "columns"):
    print("y_train columns:", list(y_train.columns))
    print(y_train.head())


y type: <class 'pandas.core.series.Series'>
y shape: (400,)


In [20]:
# Train adn evaluate base model
base_pipeline.fit(X_train,y_train)

In [21]:
#Predictions and evaluation metrics
print("Evaluation Metrics For Base Model")
y_train_pred = base_pipeline.predict(X_train)
y_test_pred = base_pipeline.predict(X_test)

def evaluate_model(y_true, y_pred, data_type = "Train"):
    print(f"Evaluation Metrics for {data_type} data:")
    print(f"Accuracy: {accuracy_score(y_true,y_pred)}")
    print(f"F1 Score: {f1_score(y_true, y_pred)}")
    print("Classification Report:")
    print(classification_report(y_true,y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\n")

evaluate_model(y_train, y_train_pred, "Train")
evaluate_model(y_test, y_test_pred,"Test")




Evaluation Metrics For Base Model
Evaluation Metrics for Train data:
Accuracy: 0.8975
F1 Score: 0.0
Classification Report:
              precision    recall  f1-score   support

           0       0.90      1.00      0.95       359
           1       0.00      0.00      0.00        41

    accuracy                           0.90       400
   macro avg       0.45      0.50      0.47       400
weighted avg       0.81      0.90      0.85       400

Confusion Matrix:
[[359   0]
 [ 41   0]]


Evaluation Metrics for Test data:
Accuracy: 0.9
F1 Score: 0.0
Classification Report:
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        90
           1       0.00      0.00      0.00        10

    accuracy                           0.90       100
   macro avg       0.45      0.50      0.47       100
weighted avg       0.81      0.90      0.85       100

Confusion Matrix:
[[90  0]
 [10  0]]




In [29]:
#hyperparameter tuning
param_grid = {
    'classifier__C':[0.1,1,10],
    'classifier__kernel':['linear','rbf','poly'],
    'classifier__gamma':['scale','auto'],
}

grid_search = GridSearchCV(base_pipeline, param_grid, cv=5, n_jobs =-1,scoring='accuracy')
grid_search.fit(X_train,y_train)

print(f"Best Hyperparameters:{grid_search.best_params_}")

Best Hyperparameters:{'classifier__C': 0.1, 'classifier__gamma': 'scale', 'classifier__kernel': 'linear'}


In [31]:
#Build and evaluate the best model 
print("")
print("-----EVAL METRICS FOR BEST MODEL-----")
print("")

best_pipeline = grid_search.best_estimator_

y_train_best_pred = best_pipeline.predict(X_train)
y_test_best_pred = best_pipeline.predict(X_test)

evaluate_model(y_train_best_pred,y_train)
evaluate_model(y_test_best_pred, y_test)


-----EVAL METRICS FOR BEST MODEL-----

Evaluation Metrics for Train data:
Accuracy: 0.8975
F1 Score: 0.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.90      0.95       400
           1       0.00      0.00      0.00         0

    accuracy                           0.90       400
   macro avg       0.50      0.45      0.47       400
weighted avg       1.00      0.90      0.95       400

Confusion Matrix:
[[359  41]
 [  0   0]]


Evaluation Metrics for Train data:
Accuracy: 0.9
F1 Score: 0.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.90      0.95       100
           1       0.00      0.00      0.00         0

    accuracy                           0.90       100
   macro avg       0.50      0.45      0.47       100
weighted avg       1.00      0.90      0.95       100

Confusion Matrix:
[[90 10]
 [ 0  0]]




In [36]:
metrics_comparison = pd.DataFrame({
    'Metric': ['Accuracy', 'F1 Score'],
    'BaseModelTrain': [
        accuracy_score(y_train, y_train_pred),
        f1_score(y_train, y_train_pred)
    ],
    'BaseModelTest': [
        accuracy_score(y_test, y_test_pred),
        f1_score(y_test, y_test_pred)
    ],
    'BestModelTrain': [
        accuracy_score(y_train, y_train_best_pred),
        f1_score(y_train, y_train_best_pred)
    ],
    'BestModelTest': [
        accuracy_score(y_test, y_test_best_pred),
        f1_score(y_test, y_test_best_pred)   # ✅ fixed
    ]
})

print(metrics_comparison)

     Metric  BaseModelTrain  BaseModelTest  BestModelTrain  BestModelTest
0  Accuracy          0.8975            0.9          0.8975            0.9
1  F1 Score          0.0000            0.0          0.0000            0.0


In [41]:
#Train the final model
final_model = best_pipeline.fit(X,y)

In [42]:
#Train the final model
joblib.dump(final_model, 'final_svm_model.pkl')

print("")
print("Final model saved as 'final_svm_model.pkl'")


Final model saved as 'final_svm_model.pkl'
