# Importing packages and data

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

df = pd.read_csv('fraud_prediction.csv')

# Prepping data for model

In [2]:
df = df.drop(['Unnamed: 0'], axis = 1)

In [3]:
features = df.drop(['isFraud'], axis = 1).values
target = df['isFraud'].values

X_train,X_test,y_train,y_test = train_test_split(features,target,test_size = 0.3, random_state = 42,stratify = target)

In [4]:
svm = LinearSVC(random_state = 50)

In [5]:
svm.fit(X_train,y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=50, tol=0.0001,
     verbose=0)

# Runing Model

In [6]:
print( "{0:.2%}".format(svm.score(X_test, y_test)))

97.13%


# Optimizing Hyperparameters

In [7]:
svm = LinearSVC(random_state = 50)

In [8]:
grid = GridSearchCV(svm, {'C':[0.00001, 0.0001, 0.001, 0.01, 0.1, 10]})
grid.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=50, tol=0.0001,
     verbose=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [9]:
print("The best value of the inverse regularization strength is:", grid.best_params_)

The best value of the inverse regularization strength is: {'C': 0.01}


# Redoing model with standardized attributes

In [17]:
order= [('scaler', StandardScaler()), ('SVM', LinearSVC(C = 0.01, random_state = 50))]

pipeline = Pipeline(order)

In [18]:
svm_scaled = pipeline.fit(X_train, y_train)

99.71%
