In [1]:
import pandas as pd
import numpy as np

In [2]:
df_fraud=pd.read_csv('../Data/fraud_preprocessed.csv')
df_credit=pd.read_csv('../Data/credit_clean.csv')

In [3]:
df_fraud.head()

Unnamed: 0,user_id,signup_time,purchase_time,purchase_value,device_id,age,ip_address,class,hour_of_day,day_of_week,...,country_United States,country_Uruguay,country_Uzbekistan,country_Vanuatu,country_Venezuela,country_Viet Nam,country_Virgin Islands (U.S.),country_Yemen,country_Zambia,country_Zimbabwe
0,2,2015-01-11 03:47:13,2015-02-21 10:03:37,0.310345,FGBQNDNBETFJJ,0.12069,880217484,0,10,5,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4,2015-06-02 16:40:57,2015-09-26 21:32:16,0.22069,MKFUIVOHLJBYN,0.344828,2785906106,0,21,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8,2015-05-28 07:53:06,2015-08-13 11:53:07,0.262069,SCQGQALXBUQZJ,0.12069,356056736,0,11,3,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,12,2015-01-10 06:25:12,2015-03-04 20:56:37,0.17931,MSNWCFEHKTIOY,0.017241,2985180352,0,20,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,16,2015-02-03 13:48:23,2015-03-12 12:46:23,0.0,FROZWSSWOHZBE,0.241379,578312545,0,12,3,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
df_credit.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer
import mlflow
import mlflow.sklearn
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, SimpleRNN, LSTM

In [4]:
def preprocess_fraud_data(df):
    df['signup_time'] = pd.to_datetime(df['signup_time'])
    df['purchase_time'] = pd.to_datetime(df['purchase_time'])
    df = df.drop(['signup_time', 'purchase_time', 'user_id', 'device_id'], axis=1)
    return df

df_fraud = preprocess_fraud_data(df_fraud)

In [5]:
# Impute missing values
imputer = SimpleImputer(strategy='mean')
df_fraud = pd.DataFrame(imputer.fit_transform(df_fraud), columns=df_fraud.columns)
df_credit = pd.DataFrame(imputer.fit_transform(df_credit), columns=df_credit.columns)

In [6]:
# Ensure target variables are binary
df_fraud['class'] = df_fraud['class'].astype(int)
df_credit['Class'] = df_credit['Class'].astype(int)

In [7]:
# Separate features and target for Fraud_Data
X_fraud = df_fraud.drop('class', axis=1)
y_fraud = df_fraud['class']

# Separate features and target for creditcard data
X_credit = df_credit.drop('Class', axis=1)
y_credit = df_credit['Class']

# Train-test split for Fraud_Data
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.3, random_state=42)

# Train-test split for creditcard data
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.3, random_state=42)

In [8]:
# Model training and evaluation function
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Performance of {model_name}:")
    print(classification_report(y_test, y_pred))

    # Log model and metrics with MLflow
    with mlflow.start_run():
        mlflow.sklearn.log_model(model, model_name)
        mlflow.log_param("model_type", model_name)
        mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
        mlflow.log_metric("precision", precision_score(y_test, y_pred))
        mlflow.log_metric("recall", recall_score(y_test, y_pred))
        mlflow.log_metric("f1_score", f1_score(y_test, y_pred))

In [10]:
# Logistic Regression
log_reg_fraud = LogisticRegression()
train_and_evaluate_model(log_reg_fraud, X_fraud_train, y_fraud_train, X_fraud_test, y_fraud_test, "Logistic Regression")



Performance of Logistic Regression:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.90      1.00      0.95     35044
           1       0.00      0.00      0.00      3700

    accuracy                           0.90     38744
   macro avg       0.45      0.50      0.47     38744
weighted avg       0.82      0.90      0.86     38744



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# Logistic Regression
log_reg_credit = LogisticRegression()
train_and_evaluate_model(log_reg_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, "Logistic Regression")



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Performance of Logistic Regression:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     84984
           1       0.69      0.57      0.63       134

    accuracy                           1.00     85118
   macro avg       0.84      0.79      0.81     85118
weighted avg       1.00      1.00      1.00     85118





In [9]:
# Decision Tree
dt_fraud = DecisionTreeClassifier()
train_and_evaluate_model(dt_fraud, X_fraud_train, y_fraud_train, X_fraud_test, y_fraud_test, "Decision Tree")



Performance of Decision Tree:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94     35044
           1       0.47      0.57      0.51      3700

    accuracy                           0.90     38744
   macro avg       0.71      0.75      0.73     38744
weighted avg       0.91      0.90      0.90     38744



In [16]:
# Decision Tree
dt_credit = DecisionTreeClassifier()
train_and_evaluate_model(dt_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, "Decision Tree")



Performance of Decision Tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     84984
           1       0.71      0.78      0.75       134

    accuracy                           1.00     85118
   macro avg       0.86      0.89      0.87     85118
weighted avg       1.00      1.00      1.00     85118



In [17]:
# Random Forest
rf_credit = RandomForestClassifier()
train_and_evaluate_model(rf_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, "Random Forest")



Performance of Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     84984
           1       0.95      0.73      0.83       134

    accuracy                           1.00     85118
   macro avg       0.98      0.87      0.91     85118
weighted avg       1.00      1.00      1.00     85118



In [10]:
# Random Forest
rf_fraud = RandomForestClassifier()
train_and_evaluate_model(rf_fraud, X_fraud_train, y_fraud_train, X_fraud_test, y_fraud_test, "Random Forest")



Performance of Random Forest:
              precision    recall  f1-score   support

           0       0.95      1.00      0.98     35044
           1       0.99      0.53      0.69      3700

    accuracy                           0.95     38744
   macro avg       0.97      0.77      0.83     38744
weighted avg       0.96      0.95      0.95     38744



In [1]:
X_fraud_train

NameError: name 'X_fraud_train' is not defined

In [19]:
# Gradient Boosting
gb_fraud = GradientBoostingClassifier()
train_and_evaluate_model(gb_fraud, X_fraud_train, y_fraud_train, X_fraud_test, y_fraud_test, "Gradient Boosting")



Performance of Gradient Boosting:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     35044
           1       0.88      0.01      0.02      3700

    accuracy                           0.91     38744
   macro avg       0.89      0.50      0.49     38744
weighted avg       0.90      0.91      0.86     38744



In [20]:
# Gradient Boosting
gb_credit = GradientBoostingClassifier()
train_and_evaluate_model(gb_credit,  X_credit_train, y_credit_train, X_credit_test, y_credit_test, "Gradient Boosting")



Performance of Gradient Boosting:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     84984
           1       0.76      0.49      0.59       134

    accuracy                           1.00     85118
   macro avg       0.88      0.74      0.80     85118
weighted avg       1.00      1.00      1.00     85118



In [21]:
# Multi-Layer Perceptron (MLP)
mlp_credit = MLPClassifier()
train_and_evaluate_model(mlp_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, "MLP")



Performance of MLP:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     84984
           1       0.52      0.75      0.62       134

    accuracy                           1.00     85118
   macro avg       0.76      0.88      0.81     85118
weighted avg       1.00      1.00      1.00     85118



In [22]:
# Multi-Layer Perceptron (MLP)
mlp_fraud = MLPClassifier()
train_and_evaluate_model(mlp_fraud, X_fraud_train, y_fraud_train, X_fraud_test, y_fraud_test, "MLP")



Performance of MLP:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     35044
           1       0.10      1.00      0.17      3700

    accuracy                           0.10     38744
   macro avg       0.05      0.50      0.09     38744
weighted avg       0.01      0.10      0.02     38744



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [23]:
# Neural Network model architectures and training
def train_nn_model(model, X_train, y_train, X_test, y_test, model_name):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    # Log model and metrics with MLflow
    with mlflow.start_run():
        mlflow.keras.log_model(model, model_name)
        loss, accuracy = model.evaluate(X_test, y_test)
        mlflow.log_metric("accuracy", accuracy)



In [24]:
# Reshape data for CNN and RNN models
X_fraud_train_cnn_rnn = X_fraud_train.values.reshape((X_fraud_train.shape[0], X_fraud_train.shape[1], 1))
X_fraud_test_cnn_rnn = X_fraud_test.values.reshape((X_fraud_test.shape[0], X_fraud_test.shape[1], 1))
X_credit_train_cnn_rnn = X_credit_train.values.reshape((X_credit_train.shape[0], X_credit_train.shape[1], 1))
X_credit_test_cnn_rnn = X_credit_test.values.reshape((X_credit_test.shape[0], X_credit_test.shape[1], 1))



In [25]:
# CNN
cnn_model_fraud = Sequential([
    Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_fraud_train_cnn_rnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(100, activation='relu'),
    Dense(1, activation='sigmoid')
])
train_nn_model(cnn_model_fraud, X_fraud_train_cnn_rnn, y_fraud_train, X_fraud_test_cnn_rnn, y_fraud_test, "CNN")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 54ms/step - accuracy: 0.8266 - loss: 1572596.5000 - val_accuracy: 0.9045 - val_loss: 22294.5137
Epoch 2/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 44ms/step - accuracy: 0.8269 - loss: 47768.0078 - val_accuracy: 0.9045 - val_loss: 0.4361
Epoch 3/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 46ms/step - accuracy: 0.9058 - loss: 0.3777 - val_accuracy: 0.9045 - val_loss: 0.3173
Epoch 4/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 45ms/step - accuracy: 0.9051 - loss: 0.3146 - val_accuracy: 0.9045 - val_loss: 0.3151
Epoch 5/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 38ms/step - accuracy: 0.9073 - loss: 0.3088 - val_accuracy: 0.9045 - val_loss: 0.3151
Epoch 6/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 46ms/step - accuracy: 0.9040 - loss: 0.3162 - val_accuracy: 0.9045 - v



[1m1211/1211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - accuracy: 0.9063 - loss: 0.3110


In [26]:
# CNN Model
cnn_model_credit = Sequential([
    Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_credit_train_cnn_rnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(100, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Train and evaluate the CNN model
train_nn_model(cnn_model_credit, X_credit_train_cnn_rnn, y_credit_train, X_credit_test_cnn_rnn, y_credit_test, "CNN")


Epoch 1/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 11ms/step - accuracy: 0.9952 - loss: 5.6698 - val_accuracy: 0.9982 - val_loss: 0.0791
Epoch 2/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 10ms/step - accuracy: 0.9982 - loss: 0.2011 - val_accuracy: 0.9984 - val_loss: 0.0601
Epoch 3/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 11ms/step - accuracy: 0.9991 - loss: 0.0466 - val_accuracy: 0.9994 - val_loss: 0.0052
Epoch 4/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 11ms/step - accuracy: 0.9990 - loss: 0.0138 - val_accuracy: 0.9994 - val_loss: 0.0045
Epoch 5/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 11ms/step - accuracy: 0.9992 - loss: 0.0069 - val_accuracy: 0.9993 - val_loss: 0.0041
Epoch 6/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 12ms/step - accuracy: 0.9981 - loss: 0.0972 - val_accuracy: 0.9993 - val_loss: 0.0112
Epoc



[1m2660/2660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 6ms/step - accuracy: 0.9995 - loss: 0.0035


In [27]:
# RNN
rnn_model_fraud = Sequential([
    SimpleRNN(100, activation='relu', input_shape=(X_fraud_train_cnn_rnn.shape[1], 1)),
    Dense(1, activation='sigmoid')
])
train_nn_model(rnn_model_fraud, X_fraud_train_cnn_rnn, y_fraud_train, X_fraud_test_cnn_rnn, y_fraud_test, "RNN")



  super().__init__(**kwargs)


Epoch 1/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m389s[0m 134ms/step - accuracy: 0.9041 - loss: 0.3427 - val_accuracy: 0.9045 - val_loss: 0.3152
Epoch 2/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 129ms/step - accuracy: 0.9066 - loss: 0.3114 - val_accuracy: 0.9045 - val_loss: 0.3162
Epoch 3/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 130ms/step - accuracy: 0.9051 - loss: 0.3149 - val_accuracy: 0.9045 - val_loss: 0.3170
Epoch 4/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m364s[0m 124ms/step - accuracy: 0.9039 - loss: 0.3173 - val_accuracy: 0.9045 - val_loss: 0.3158
Epoch 5/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 131ms/step - accuracy: 0.9048 - loss: 0.3152 - val_accuracy: 0.9045 - val_loss: 0.3164
Epoch 6/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 132ms/step - accuracy: 0.9052 - loss: 0.3141 - val_accuracy: 0.9045 - val_loss:



[1m1211/1211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 42ms/step - accuracy: 0.9063 - loss: 0.3114


In [28]:
# RNN Model
rnn_model_credit = Sequential([
    SimpleRNN(100, activation='relu', input_shape=(X_fraud_train_cnn_rnn.shape[1], 1)),
    Dense(1, activation='sigmoid')
])

# Train and evaluate the RNN model using reshaped fraud data
train_nn_model(rnn_model_credit, X_fraud_train_cnn_rnn, y_fraud_train, X_fraud_test_cnn_rnn, y_fraud_test, "RNN")


Epoch 1/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m603s[0m 163ms/step - accuracy: 0.9053 - loss: 0.3621 - val_accuracy: 0.9045 - val_loss: 0.3153
Epoch 2/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m590s[0m 194ms/step - accuracy: 0.9047 - loss: 0.3153 - val_accuracy: 0.9045 - val_loss: 0.3154
Epoch 3/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m460s[0m 158ms/step - accuracy: 0.9056 - loss: 0.3135 - val_accuracy: 0.9045 - val_loss: 0.3151
Epoch 4/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m419s[0m 148ms/step - accuracy: 0.9064 - loss: 0.3118 - val_accuracy: 0.9045 - val_loss: 0.3154
Epoch 5/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 154ms/step - accuracy: 0.9051 - loss: 0.3149 - val_accuracy: 0.9045 - val_loss: 0.3152
Epoch 6/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 140ms/step - accuracy: 0.9042 - loss: 0.3164 - val_accuracy: 0.9045 - val_loss:



[1m1211/1211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 46ms/step - accuracy: 0.9063 - loss: 0.3113


In [29]:
# LSTM
lstm_model_credit = Sequential([
    LSTM(100, activation='relu', input_shape=(X_credit_train_cnn_rnn.shape[1], 1)),
    Dense(1, activation='sigmoid')
])
train_nn_model(lstm_model_credit, X_credit_train_cnn_rnn, y_credit_train, X_credit_test_cnn_rnn, y_credit_test, "LSTM")


Epoch 1/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m478s[0m 75ms/step - accuracy: 0.9711 - loss: 47.0527 - val_accuracy: 0.9984 - val_loss: 32.1375
Epoch 2/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m403s[0m 65ms/step - accuracy: 0.9973 - loss: 19.2734 - val_accuracy: 0.9984 - val_loss: 144.8788
Epoch 3/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m523s[0m 84ms/step - accuracy: 0.9955 - loss: 32.9510 - val_accuracy: 0.9984 - val_loss: 6.2530
Epoch 4/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 80ms/step - accuracy: 0.9975 - loss: 3.2385 - val_accuracy: 0.9984 - val_loss: 1337.4928
Epoch 5/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 78ms/step - accuracy: 0.9934 - loss: 400.1309 - val_accuracy: 0.9984 - val_loss: 13.7833
Epoch 6/10
[1m6207/6207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m526s[0m 85ms/step - accuracy: 0.9970 - loss: 13.4971 - val_accuracy: 0.9984 - va



[1m2660/2660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 25ms/step - accuracy: 0.9984 - loss: 154.6166


In [30]:
# LSTM Model
lstm_model_fraud = Sequential([
    LSTM(100, activation='relu', input_shape=(X_fraud_train_cnn_rnn.shape[1], 1)),
    Dense(1, activation='sigmoid')
])

# Train and evaluate the LSTM model using reshaped fraud data
train_nn_model(lstm_model_fraud, X_fraud_train_cnn_rnn, y_fraud_train, X_fraud_test_cnn_rnn, y_fraud_test, "LSTM")


Epoch 1/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1179s[0m 407ms/step - accuracy: 0.8986 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 2/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1225s[0m 434ms/step - accuracy: 0.9070 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 3/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1106s[0m 391ms/step - accuracy: 0.9045 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 4/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1220s[0m 432ms/step - accuracy: 0.9062 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 5/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1002s[0m 354ms/step - accuracy: 0.9052 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 6/10
[1m2826/2826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1034s[0m 366ms/step - accuracy: 0.9055 - loss: nan - val_accuracy: 0.9045 - val_loss: nan
Epoch 7/10
[1m2826/28



[1m1211/1211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 190ms/step - accuracy: 0.9063 - loss: nan


In [12]:
# saved best performing model as fraud_model.pkl
# Save the model to disk
import pickle
model=rf_fraud
filename = '../Models/fraud_model.pkl'
pickle.dump(model, open(filename, 'wb'))

In [1]:
# Save the model to disk
import pickle
import pandas as pd

# Load the model
filename = '../Models/fraud_model.pkl'
loaded_model = pickle.load(open(filename, 'rb'))

# Create random data (replace with your actual features)
random_data = {
    'transaction_count_per_day': 0.8,
    'transaction_velocity_past_hour': 0.6,
    'age': 30,
    'purchase_value': 50,
    'source_Ads': 1,
    'source_Direct': 0,
    'source_SEO': 0,
    'browser_Chrome': 1,
    'browser_FireFox': 0,
    'browser_IE': 0,
    'browser_Opera': 0,
    'browser_Safari': 0,
    'sex_F': 1,
    'sex_M': 0,
    'country_Afghanistan': 0,
    'country_Albania': 0,
    'country_Algeria': 0,
    'country_United States': 1,
    # Add other country categorical features with 0 values...
}

# Ensure all required features are present in the random data
required_features = loaded_model.feature_names_in_
for feature in required_features:
    if feature not in random_data:
        random_data[feature] = 0

# Convert to DataFrame
random_df = pd.DataFrame([random_data])

# Reorder columns to match the training data
random_df = random_df[required_features]

# Make predictions
predictions = loaded_model.predict(random_df)

print("Predicted class (0: Not fraud, 1: Fraud):", predictions[0])


Predicted class (0: Not fraud, 1: Fraud): 0


In [2]:
import requests

url = 'http://localhost:5000/predict'
headers = {'Content-Type': 'application/json'}
data = {
    "transaction_count_per_day": 0.8,
    "transaction_velocity_past_hour": 0.6,
    "age": 30,
    "purchase_value": 50,
    "source_Ads": 1,
    "source_Direct": 0,
    "source_SEO": 0,
    "browser_Chrome": 1,
    "browser_IE": 0,
    "browser_Safari": 0,
    "sex_F": 1,
    "sex_M": 0,
    "country_United States": 1
}

response = requests.post(url, json=data, headers=headers)
print(response.json())


{'prediction': 0}
