## Machine  Learning Models

Loading dataset

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r'ready_data.csv')
df.head()

Unnamed: 0,Trans_Amount,Amount_paid,Speed,Fraud_indicator,Hour,DayOfWeek,Month,Veh_Type_Bus,Veh_Type_Car,Veh_Type_Motorcycle,...,Lane_Type_Express,Lane_Type_Regular,Veh_Dimensions_Large,Veh_Dimensions_Medium,Veh_Dimensions_Small,"Geo_location_12.84197701525119, 77.67547528176169","Geo_location_12.936687032945434, 77.53113977439017","Geo_location_13.042660878688794, 77.47580097259879","Geo_location_13.059816123454882, 77.77068662374292","Geo_location_13.21331620748757, 77.55413526894684"
0,350,120,65,0,11,4,1,1,0,0,...,1,0,1,0,0,0,0,0,1,0
1,120,100,78,0,14,5,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0
2,0,0,53,1,18,6,1,0,0,1,...,0,1,0,0,1,0,0,0,1,0
3,350,120,92,0,2,0,1,0,0,0,...,0,1,1,0,0,0,0,0,1,0
4,140,100,60,0,6,1,1,0,0,0,...,1,0,0,1,0,0,0,0,1,0


## Spliting the data into x and y

In [3]:
x = df.drop(columns='Fraud_indicator', axis=1 )
x.head()

Unnamed: 0,Trans_Amount,Amount_paid,Speed,Hour,DayOfWeek,Month,Veh_Type_Bus,Veh_Type_Car,Veh_Type_Motorcycle,Veh_Type_SUV,...,Lane_Type_Express,Lane_Type_Regular,Veh_Dimensions_Large,Veh_Dimensions_Medium,Veh_Dimensions_Small,"Geo_location_12.84197701525119, 77.67547528176169","Geo_location_12.936687032945434, 77.53113977439017","Geo_location_13.042660878688794, 77.47580097259879","Geo_location_13.059816123454882, 77.77068662374292","Geo_location_13.21331620748757, 77.55413526894684"
0,350,120,65,11,4,1,1,0,0,0,...,1,0,1,0,0,0,0,0,1,0
1,120,100,78,14,5,1,0,1,0,0,...,0,1,0,0,1,0,0,0,1,0
2,0,0,53,18,6,1,0,0,1,0,...,0,1,0,0,1,0,0,0,1,0
3,350,120,92,2,0,1,0,0,0,0,...,0,1,1,0,0,0,0,0,1,0
4,140,100,60,6,1,1,0,0,0,0,...,1,0,0,1,0,0,0,0,1,0


In [4]:
y = df['Fraud_indicator']

y.head()

0    0
1    0
2    1
3    0
4    0
Name: Fraud_indicator, dtype: int64

### Splittinng into  train set and test set

In [5]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=0.8)

## Logistic Regression Model

### Cross Validation

In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import warnings

# Filter out future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Creating a pipeline that scales the data and then applies Logistic Regression
logistic_model=make_pipeline(StandardScaler(),LogisticRegression(max_iter=1000,
                                                                 solver='lbfgs'))

# Fit the model on the training data
logistic_model.fit(x_train, y_train)

# Perform cross-validation
scores = cross_val_score(logistic_model, x, y, cv=10)  


# Print the scores for each fold
print("Cross-Validation Scores:", scores)

# Print the average score
print("Average Score:", scores.mean())

Cross-Validation Scores: [0.99  0.976 0.948 0.976 0.974 0.95  0.978 0.964 0.974 0.974]
Average Score: 0.9703999999999999


## Inights
1. There's a slight variation in scores across different folds, but all scores are relatively high. This is a good sign of the model's robustness and its ability to generalize across different data samples.

### Hyperparameter Tuning (Grid Search)

In [36]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the parameter grid for grid search
param_grid = {
    'logisticregression__C': [0.001, 0.01, 0.1, 1, 10, 30, 50, 80, 100],
    'logisticregression__penalty': [ 'l2'],
}

# Create GridSearchCV object
grid_search = GridSearchCV(logistic_model, param_grid, cv=10, 
                           scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(x_train, y_train)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Get the best model from grid search
best_logistic_model = grid_search.best_estimator_

# Predict on the test data
y_pred = best_logistic_model.predict(x_test)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test, y_pred, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'logisticregression__C': 100, 'logisticregression__penalty': 'l2'}
Best Cross-Validation Accuracy: 0.98475
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.94      0.97       177
       Fraud       0.99      1.00      0.99       823

    accuracy                           0.99      1000
   macro avg       0.99      0.97      0.98      1000
weighted avg       0.99      0.99      0.99      1000



## Insights
1. The model indicates excellent performance in identifying fraudulent transactions with very few false positives and almost no false negatives.
2. The recall for 'Not Fraud' is 0.94 which suggests that some legitimate transactions are being misclassified as fraudulent.

## Training Metrics

In [13]:
from sklearn.metrics import classification_report

y_train_pred = best_logistic_model.predict(x_train)

train_report = classification_report(y_train, y_train_pred, 
                                     target_names=['Not Fraud', 'Fraud'])

print("Training Metrics Report:\n", train_report)

Training Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.92      0.96       806
       Fraud       0.98      1.00      0.99      3194

    accuracy                           0.98      4000
   macro avg       0.99      0.96      0.98      4000
weighted avg       0.99      0.98      0.98      4000



## Insights
Overall accuracy on the training set is 0.97, which is slightly lower than the test set. This is somewhat unusual as models typically perform better on the training data.

## Random Forest Model

## cross-validation

In [18]:
from sklearn.ensemble import RandomForestClassifier


# Initializing the RandomForestClassifier
randf_model = RandomForestClassifier(n_jobs=-1)

# Fit the model on the training data
randf_model.fit(x_train, y_train)

# Perform cross-validation
scores = cross_val_score(randf_model, x, y, cv=10)  # 'cv' is the number of folds

# Print the scores for each fold
print("Cross-Validation Scores:", scores)

# Print the average score
print("Average Score:", scores.mean())


Cross-Validation Scores: [0.994 0.98  0.982 0.984 0.99  0.98  0.96  0.99  0.99  0.986]
Average Score: 0.9836


## Insights
1. The scores for each fold  shows that the model's performance is quite consistent across different subsets of data.
2. There are no extreme variations in performance across folds, which is a good sign of the model's stability and generalizability.
3. A high average score like this, indicates that the model is likely to perform well on unseen data.

## Hyperparameter Tuning (Grid Search)

In [19]:


# Define the reduced parameter grid for grid search
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
}

# Create GridSearchCV object for Random Forest
grid_search_rf = GridSearchCV(RandomForestClassifier(n_jobs=-1), param_grid_rf, 
                              cv=10, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_rf.fit(x_train, y_train)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_rf.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_rf.best_score_)

# Get the best model from grid search
best_rf_model = grid_search_rf.best_estimator_

# Predict on the test data
y_pred = best_rf_model.predict(x_test)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test, y_pred, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Best Cross-Validation Accuracy: 0.9869999999999999
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.94      0.97       177
       Fraud       0.99      1.00      0.99       823

    accuracy                           0.99      1000
   macro avg       0.99      0.97      0.98      1000
weighted avg       0.99      0.99      0.99      1000



### Insights
1. The model shows robustness in classifying both classes, with high accuracy (0.99) on the test set.
2. The macro and weighted averages on the test set is high and indicates balanced performance across classes, despite the potential class imbalance (more 'Fraud' instances than 'Not Fraud').

In [20]:

y_train_pred = best_rf_model.predict(x_train)

train_report = classification_report(y_train, y_train_pred, 
                                     target_names=['Not Fraud', 'Fraud'])

print("Training Metrics Report:\n", train_report)


Training Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      1.00      1.00       806
       Fraud       1.00      1.00      1.00      3194

    accuracy                           1.00      4000
   macro avg       1.00      1.00      1.00      4000
weighted avg       1.00      1.00      1.00      4000



###### Excellent Training Performance:
This model achieves perfect precision, recall, and F1-scores of 1.00 for both classes ('Not Fraud' and 'Fraud') on the training data. This indicates that the model is able to perfectly classify all training instances.

## Gradient Boosting Model

### Cross validation

In [21]:
from sklearn.ensemble import GradientBoostingClassifier


# Initializing the GradientBoostingClassifier
gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=1.5,
                                 max_depth=1, random_state=0)

# Fit the model on the training data
gbc.fit(x_train, y_train)

# Perform cross-validation
scores = cross_val_score(gbc, x, y, cv=5) 

# Print the scores for each fold
print("Cross-Validation Scores:", scores)

# Print the average score
print("Average Score:", scores.mean())

Cross-Validation Scores: [0.987 0.993 0.98  0.987 0.989]
Average Score: 0.9872


## Hyperparameter Tuning (Grid Search)

In [23]:

# Define the reduced parameter grid for grid search
param_grid_gb = {
    'n_estimators': [50, 100],
    'learning_rate': [0.1],
    'max_depth': [3, 5],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
}

# Create GridSearchCV object for Gradient Boosting
grid_search_gb = GridSearchCV(GradientBoostingClassifier(), param_grid_gb, 
                              cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_gb.fit(x_train, y_train)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_gb.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_gb.best_score_)

# Get the best model from grid search
best_gb_model = grid_search_gb.best_estimator_

# Predict on the test data
y_pred_gb = best_gb_model.predict(x_test)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test, y_pred_gb, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Cross-Validation Accuracy: 0.99875
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.99      0.99       177
       Fraud       1.00      1.00      1.00       823

    accuracy                           1.00      1000
   macro avg       1.00      0.99      1.00      1000
weighted avg       1.00      1.00      1.00      1000



## Train Metrics

In [27]:

y_train_pred = best_gb_model.predict(x_train)

train_report = classification_report(y_train, y_train_pred, 
                                     target_names=['Not Fraud', 'Fraud'])

print("Training Metrics Report:\n", train_report)


Training Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.94      0.97       806
       Fraud       0.99      1.00      0.99      3194

    accuracy                           0.99      4000
   macro avg       0.99      0.97      0.98      4000
weighted avg       0.99      0.99      0.99      4000



#### This model is performing very poor since  training accurecy is lower than ttessting.

## LinearSVC

### Cross Validation

In [29]:
from sklearn.svm import LinearSVC

# Scale the features and increase the number of iterations for LinearSVC
svc = make_pipeline(StandardScaler(), LinearSVC(class_weight='balanced', 
                                                max_iter=5000))

# Fit the model on the training data
svc.fit(x_train, y_train)

# Perform cross-validation
scores = cross_val_score(svc, x, y, cv=5) 

# Print the scores for each fold
print("Cross-Validation Scores:", scores)

# Print the average score
print("Average Score:", scores.mean())

Cross-Validation Scores: [0.932 0.966 0.978 0.985 0.99 ]
Average Score: 0.9702


## Hyperparameter Tuning (Grid Search)

In [30]:


# Define the parameter grid for grid search
param_grid_svc = {
    'C': [0.01, 0.1,],
    'penalty': ['l2'],
}

# Create GridSearchCV object for LinearSVC
grid_search_svc = GridSearchCV(LinearSVC(dual=False), param_grid_svc, 
                               cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_svc.fit(x_train, y_train)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_svc.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_svc.best_score_)

# Get the best model from grid search
best_svc_model = grid_search_svc.best_estimator_

# Predict on the test data
y_pred_svc = best_svc_model.predict(x_test)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test, y_pred_svc, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'C': 0.01, 'penalty': 'l2'}
Best Cross-Validation Accuracy: 0.98375
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.94      0.97       177
       Fraud       0.99      1.00      0.99       823

    accuracy                           0.99      1000
   macro avg       0.99      0.97      0.98      1000
weighted avg       0.99      0.99      0.99      1000



## Train Metrics

In [31]:

y_train_pred = best_svc_model.predict(x_train)

train_report = classification_report(y_train, y_train_pred, 
                                     target_names=['Not Fraud', 'Fraud'])

print("Training Metrics Report:\n", train_report)


Training Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.92      0.96       806
       Fraud       0.98      1.00      0.99      3194

    accuracy                           0.98      4000
   macro avg       0.99      0.96      0.97      4000
weighted avg       0.98      0.98      0.98      4000



# Changing the dataset to be balanced for more analysis

In [37]:
df.head()

Unnamed: 0,Trans_Amount,Amount_paid,Speed,Fraud_indicator,Hour,DayOfWeek,Month,Veh_Type_Bus,Veh_Type_Car,Veh_Type_Motorcycle,...,Lane_Type_Express,Lane_Type_Regular,Veh_Dimensions_Large,Veh_Dimensions_Medium,Veh_Dimensions_Small,"Geo_location_12.84197701525119, 77.67547528176169","Geo_location_12.936687032945434, 77.53113977439017","Geo_location_13.042660878688794, 77.47580097259879","Geo_location_13.059816123454882, 77.77068662374292","Geo_location_13.21331620748757, 77.55413526894684"
0,350,120,65,0,11,4,1,1,0,0,...,1,0,1,0,0,0,0,0,1,0
1,120,100,78,0,14,5,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0
2,0,0,53,1,18,6,1,0,0,1,...,0,1,0,0,1,0,0,0,1,0
3,350,120,92,0,2,0,1,0,0,0,...,0,1,1,0,0,0,0,0,1,0
4,140,100,60,0,6,1,1,0,0,0,...,1,0,0,1,0,0,0,0,1,0


In [38]:
not_frauds = df.query('Fraud_indicator == 0')
frauds = df.query('Fraud_indicator == 1')

not_frauds['Fraud_indicator'].value_counts(),frauds['Fraud_indicator'].value_counts()

(Fraud_indicator
 0    983
 Name: count, dtype: int64,
 Fraud_indicator
 1    4017
 Name: count, dtype: int64)

In [39]:
balanced_df = pd.concat([not_frauds, frauds.sample(len(not_frauds), random_state=1)])

balanced_df['Fraud_indicator'].value_counts()

Fraud_indicator
0    983
1    983
Name: count, dtype: int64

#### Trying to reduce bias and variance, and ensuring that the model is exposed to a variety of data samples during training.

In [40]:
balanced_df = balanced_df.sample(frac=1, random_state=1)

balanced_df.head()

Unnamed: 0,Trans_Amount,Amount_paid,Speed,Fraud_indicator,Hour,DayOfWeek,Month,Veh_Type_Bus,Veh_Type_Car,Veh_Type_Motorcycle,...,Lane_Type_Express,Lane_Type_Regular,Veh_Dimensions_Large,Veh_Dimensions_Medium,Veh_Dimensions_Small,"Geo_location_12.84197701525119, 77.67547528176169","Geo_location_12.936687032945434, 77.53113977439017","Geo_location_13.042660878688794, 77.47580097259879","Geo_location_13.059816123454882, 77.77068662374292","Geo_location_13.21331620748757, 77.55413526894684"
284,140,120,63,0,6,5,3,0,0,0,...,1,0,0,1,0,0,0,0,1,0
2495,330,125,81,0,2,0,3,0,0,0,...,0,1,1,0,0,0,0,0,0,1
1949,300,300,76,1,13,5,6,0,0,0,...,0,1,1,0,0,0,1,0,0,0
3988,130,130,69,1,7,4,3,0,0,0,...,1,0,0,1,0,1,0,0,0,0
4757,330,330,75,1,23,6,11,0,0,0,...,0,1,1,0,0,0,0,0,0,1


### Splitting into x and y

In [41]:
x = balanced_df.drop(columns = 'Fraud_indicator', axis=1)

y = balanced_df['Fraud_indicator']


In [42]:
y.value_counts()

Fraud_indicator
0    983
1    983
Name: count, dtype: int64

## Converting into train and Test

In [44]:

x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x,y,train_size=0.8)


## Logistic Regression

In [51]:

# Define the parameter grid for grid search
param_grid = {
    'logisticregression__C': [0.001, 0.01, 0.1, 1, 10, 30, 50, 80, 100],
    'logisticregression__penalty': [ 'l2'],
}

# Create GridSearchCV object
grid_search = GridSearchCV(logistic_model, param_grid, cv=10, 
                           scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(x_train_b, y_train_b)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Get the best model from grid search
best_logistic_model = grid_search.best_estimator_

# Predict on the test data
y_pred_b = best_logistic_model.predict(x_test_b)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test_b, y_pred_b, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'logisticregression__C': 80, 'logisticregression__penalty': 'l2'}
Best Cross-Validation Accuracy: 0.9770982826735468
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.97      0.98       200
       Fraud       0.97      1.00      0.98       194

    accuracy                           0.98       394
   macro avg       0.98      0.98      0.98       394
weighted avg       0.99      0.98      0.98       394



In [45]:

# Predict on the test data
y_pred_b = best_logistic_model.predict(x_test_b)

# Print the classification report
print("Test Metrics Report:\n" , classification_report(y_test_b, 
                                y_pred_b, target_names=['Not Fraud', 'Fraud']))


Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.93      0.96       200
       Fraud       0.93      1.00      0.97       194

    accuracy                           0.96       394
   macro avg       0.97      0.97      0.96       394
weighted avg       0.97      0.96      0.96       394



## Random Forest

In [52]:


# Define the reduced parameter grid for grid search
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
}

# Create GridSearchCV object for Random Forest
grid_search_rf = GridSearchCV(RandomForestClassifier(n_jobs=-1), param_grid_rf, 
                              cv=10, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_rf.fit(x_train_b, y_train_b)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_rf.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_rf.best_score_)

# Get the best model from grid search
best_rf_model = grid_search_rf.best_estimator_

# Predict on the test data
y_pred_b = best_rf_model.predict(x_test_b)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test_b, y_pred_b, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Cross-Validation Accuracy: 0.9821776989438039
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       0.99      0.94      0.97       200
       Fraud       0.95      0.99      0.97       194

    accuracy                           0.97       394
   macro avg       0.97      0.97      0.97       394
weighted avg       0.97      0.97      0.97       394



## Gradient Boosting

In [53]:

# Define the reduced parameter grid for grid search
param_grid_gb = {
    'n_estimators': [50, 100],
    'learning_rate': [0.1],
    'max_depth': [3, 5],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
}

# Create GridSearchCV object for Gradient Boosting
grid_search_gb = GridSearchCV(GradientBoostingClassifier(), param_grid_gb, 
                              cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_gb.fit(x_train_b, y_train_b)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_gb.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_gb.best_score_)

# Get the best model from grid search
best_gb_model = grid_search_gb.best_estimator_

# Predict on the test data
y_pred_gb_b = best_gb_model.predict(x_test_b)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test_b, y_pred_gb_b, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Cross-Validation Accuracy: 0.9980932160549993
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.99      0.99       200
       Fraud       0.99      1.00      0.99       194

    accuracy                           0.99       394
   macro avg       0.99      0.99      0.99       394
weighted avg       0.99      0.99      0.99       394



## LinearSVC

In [54]:


# Define the parameter grid for grid search
param_grid_svc = {
    'C': [0.01, 0.1,],
    'penalty': ['l2'],
}

# Create GridSearchCV object for LinearSVC
grid_search_svc = GridSearchCV(LinearSVC(dual=False), param_grid_svc, 
                               cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search_svc.fit(x_train_b, y_train_b)

# Print the best parameters and the corresponding accuracy
print("Best Parameters:", grid_search_svc.best_params_)
print("Best Cross-Validation Accuracy:", grid_search_svc.best_score_)

# Get the best model from grid search
best_svc_model = grid_search_svc.best_estimator_

# Predict on the test data
y_pred_svc_b = best_svc_model.predict(x_test_b)

# Print the classification report for test metrics
print("Test Metrics Report:\n", classification_report(y_test_b, y_pred_svc_b, 
                                        target_names=['Not Fraud', 'Fraud']))


Best Parameters: {'C': 0.1, 'penalty': 'l2'}
Best Cross-Validation Accuracy: 0.9732746941664139
Test Metrics Report:
               precision    recall  f1-score   support

   Not Fraud       1.00      0.96      0.98       200
       Fraud       0.97      1.00      0.98       194

    accuracy                           0.98       394
   macro avg       0.98      0.98      0.98       394
weighted avg       0.98      0.98      0.98       394

