**Anti-Fraud Model** by *Sohail, Ahmed,* and *Hiten*

---

Using a **Feedforward Neural Network (FNN)** because our dataset is structured and tabular.

Our initial goal was that....

In [1]:
# IMPORTING NECESSARY LIBRARIES
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import KernelPCA

from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf

import time
import seaborn as sns

In [17]:
# LOADING THE DATASET

import pandas as pd

df = pd.read_csv('Dataset\Cleaned_AntiFraud_Centre_Dataset.csv')

print(df.head())
print(f"\n\n{df.describe(include='all')}")
print("\n")

# Data type of each column
print(df.dtypes)

   Number ID Date Received Complaint Received Type        Country  \
0          1      1/2/2021            CAFC Website         Canada   
1          2      1/2/2021            CAFC Website         Canada   
2          3      1/2/2021            CAFC Website  Not Specified   
3          4      1/2/2021            CAFC Website  United States   
4          5      1/2/2021            CAFC Website         Canada   

     Province/State Fraud and Cybercrime Thematic Categories  \
0       Nova Scotia                                 Phishing   
1  British Columbia                           Identity Fraud   
2     Not Specified                                  Romance   
3        California                      Foreign Money Offer   
4           Ontario                              Merchandise   

  Solicitation Method         Gender Language of Correspondence  \
0        Text message         Female                    English   
1       Other/unknown         Female                    English   

In [None]:
# GRAPH AND ANALYSIS

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Statistical Summary of the dataset, some of the outputs are for numerical features only 
# so it will result in a value of NaN in the categorical features output
print(df.describe(include='all'))

# Our target is "Complaint Type". A victim = a fraud case and others are not fraud cases.
sns.countplot(x='Complaint Type', data=df)
plt.title('Class Distribution')
plt.show()

print("\n\n\n")


In [None]:
# FUNCTION TO SEE CORRELATION OF FEATURES - TARGET
# FUNCTION WILL PRINT 9 GRAPHS

import pandas as pd
df = pd.read_csv('Dataset\Cleaned_AntiFraud_Centre_Dataset.csv')
import seaborn as sns
import matplotlib.pyplot as plt

def plot_feature_target_correlation(df, columns, target, top_n=10):
    for col in columns:
        
        top_categories = df[col].value_counts().nlargest(top_n).index
        filtered_df = df[df[col].isin(top_categories)]
        
        sns.countplot(x=col, hue=target, data=filtered_df)
        plt.title(f" Top {top_n} {col} vs {target}")
        plt.xticks(rotation=45)
        plt.figure(figsize=(16,6))
        plt.show()
        
        
columns = ['Date Received', 'Complaint Received Type', 'Country', 'Province/State', 'Fraud and Cybercrime Thematic Categories', 
           'Solicitation Method', 'Gender', 'Language of Correspondence', 'Victim Age Range']

target = 'Complaint Type'

plot_feature_target_correlation(df, columns, target)

In [8]:
# VIEWING TARGET VARIABLE STATISTICS

print(df["Complaint Type"].value_counts())

Complaint Type
Victim        202546
Attempt       104274
Other           5829
Unknown          888
Incomplete       439
Name: count, dtype: int64


In [16]:
# HANDLING MISSING DATA AND PROCESSING

import pandas as pd
df = pd.read_csv('Dataset\Cleaned_AntiFraud_Centre_Dataset.csv')

# Convert Not Specified, Not Available, etc. into NaN value
df.replace(["Not Specified", "Not Available / non disponible", "Not Available"], pd.NA, inplace=True)
print(f"NaN Value counts in dataset before replacing values:\n--------------------\n{df.isna().sum()}")

# Change NaN values of Province/State to Unknown
df['Province/State'] = df['Province/State'].fillna('Unknown')

# Change NaN Values of Country to Unknown
df['Country'] = df['Country'].fillna('Unknown')

# Change NaN Values of Solicitation Method to mode (most frequent value) to minimize
# missingness and reduce bias
mode_solicitation = df["Solicitation Method"].mode()[0]
df["Solicitation Method"] = df["Solicitation Method"].fillna(mode_solicitation)

# Change Gender NaN values to Unknown
df['Gender'] = df['Gender'].fillna('Unknown')

# Change Language of Correspondence NaN values to Unknown
df['Language of Correspondence'] = df['Language of Correspondence'].fillna('Unknown')

# Convert $x.xx to float number and replace missing values or 0 with a computed mean.
df["Dollar Loss"] = df["Dollar Loss"].replace('[\$,]', '', regex=True).astype(float)
df["Dollar Loss"].fillna(df["Dollar Loss"].mean(), inplace=True)

# Check to make sure there are no more missing values
print(f"\nNaN Value check after handling:\n--------------------\n{df.isnull().sum()}")

NaN Value counts in dataset before replacing values:
--------------------
Number ID                                       0
Date Received                                   0
Complaint Received Type                         0
Country                                     71122
Province/State                              74332
Fraud and Cybercrime Thematic Categories        0
Solicitation Method                         17193
Gender                                      79420
Language of Correspondence                  80692
Victim Age Range                                0
Complaint Type                                  0
Number of Victims                               0
Dollar Loss                                     0
dtype: int64

NaN Value check after handling:
--------------------
Number ID                                   0
Date Received                               0
Complaint Received Type                     0
Country                                     0
Province/State           