In [None]:
# # 📌 **Importing necessary libraries**
# This cell imports all required libraries for data handling, preprocessing, SVM modeling, and evaluation.


In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# # 📌 **Loading the dataset**
# The dataset `loan_approved.csv` is loaded into a DataFrame for further analysis.


In [25]:

# Load data
df = pd.read_csv("loan_approved.csv")

In [None]:
# # 📌 **Renaming target column**
# Renames the column 'Loan_Status (Approved)' to 'Loan_Status' for ease of use.


In [27]:
# Rename target column to simplify access
df.rename(columns={'Loan_Status (Approved)': 'Loan_Status'}, inplace=True)

In [None]:
# # 📌 **Dropping non-informative column**
# Drops the 'Loan_ID' column as it doesn't contribute to the prediction.


In [29]:
# Drop Loan_ID (not useful for prediction)
df.drop('Loan_ID', axis=1, inplace=True)

In [None]:
# # 📌 **Encoding categorical variables**
# Uses `LabelEncoder` to convert categorical text features into numerical format for model compatibility.


In [31]:
# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [None]:
# # 📌 **Handling missing values**
# Applies `SimpleImputer` with strategy='most_frequent' to fill missing values in both categorical and numerical columns.


In [45]:
from sklearn.impute import SimpleImputer
# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='most_frequent')  # For categorical + numerical
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [None]:
# # 📌 **Splitting features and target**
# Separates the independent variables (`X`) from the target variable (`y`).


In [47]:
# Split features and target
X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']

In [None]:
# # 📌 **Feature scaling**
# Applies standardization using `StandardScaler` to normalize the features.


In [49]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# # 📌 **Train-test split**
# Splits the dataset into training and testing subsets for model evaluation.


In [51]:
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# # 📌 **Hyperparameter tuning setup**
# Defines a parameter grid for `GridSearchCV` to tune `C`, `gamma`, and `kernel` for the SVM model.


In [53]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.1, 1],
    'kernel': ['rbf', 'linear']
}

In [55]:

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=1, cv=5)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [57]:
# Best parameters and evaluation
print("Best Parameters:", grid.best_params_)
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 0.7886178861788617

Classification Report:
               precision    recall  f1-score   support

         0.0       0.95      0.42      0.58        43
         1.0       0.76      0.99      0.86        80

    accuracy                           0.79       123
   macro avg       0.85      0.70      0.72       123
weighted avg       0.83      0.79      0.76       123

