#### Import Library

In [None]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
import os




#### Some basic Parameters

In [None]:
RANDOM_STATE = 24
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)
EPOCHS = 200 # number of epochs
ALPHA = 0.001 # learning rate
BATCH_SIZE = 32
REG_LAMBDA = 0.05
TEST_SIZE = 0.2

# Early Stoping
PATIENCE = 20
LR_PATIENCE = 10
LR_Factor = 0.1

In [None]:
inpDir = r'C:\Users\Administrator.DAI-PC2\Desktop\240340128024\FastagFraudDetection.csv'  # Location where input data is stored
outDir = r'C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\output'  # Location to store outputs
logDir = r'C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\logs'    # Location to store logs
modelDir = r'C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\models'# Location to store models

In [None]:
# Check if all directories are present
if not os.path.exists(os.path.join(outDir)):
    os.makedirs(os.path.join(outDir))
    print ('Created {} directory'.format(outDir))

if not os.path.exists(os.path.join(modelDir)):
    os.makedirs(os.path.join(modelDir))
    print ('Created {} directory'.format(os.path.join(modelDir)))

if not os.path.exists(os.path.join(logDir)):
    os.makedirs(os.path.join(logDir))
    print ('Created {} directory'.format(os.path.join(logDir)))

Created C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\output directory
Created C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\models directory
Created C:\Users\Administrator.DAI-PC2\Desktop\DNN\Exam\logs directory


## Data Prepration

* #### Load the FastagFraudDetection dataset


In [None]:
df = pd.read_csv(r'FastagFraudDetection.csv')


In [None]:
df.sample(5)

Unnamed: 0,Transaction_ID,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Transaction_Amount,Amount_paid,Geographical_Location,Vehicle_Speed,Vehicle_Plate_Number,Fraud_indicator
1941,1942,6/9/2023 12:55,Motorcycle,FTG-897-BCD-567,D-106,Regular,Small,0,0,"12.936687032945434, 77.53113977439017",95,KA09YZ5878,Not Fraud
4088,4089,11/2/2023 16:30,SUV,FTG-078-NJH-614,B-102,Express,Large,140,140,"12.936687032945434, 77.53113977439017",58,MH12KL4567,Not Fraud
4564,4565,11/23/2023 18:06,SUV,FTG-634-RFV-098,B-102,Express,Large,145,145,"13.21331620748757, 77.55413526894684",56,KA89CT0123,Not Fraud
2075,2076,1/16/2023 0:35,Truck,FTG-432-DEF-345,C-103,Regular,Large,330,115,"13.21331620748757, 77.55413526894684",89,AP16OP9012,Fraud
3,4,1/9/2023 2:05,Truck,FTG-044-LMN-322,C-103,Regular,Large,350,120,"13.059816123454882, 77.77068662374292",92,KA11GH3456,Fraud


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Transaction_ID         5000 non-null   int64 
 1   Timestamp              5000 non-null   object
 2   Vehicle_Type           5000 non-null   object
 3   FastagID               4451 non-null   object
 4   TollBoothID            5000 non-null   object
 5   Lane_Type              5000 non-null   object
 6   Vehicle_Dimensions     5000 non-null   object
 7   Transaction_Amount     5000 non-null   int64 
 8   Amount_paid            5000 non-null   int64 
 9   Geographical_Location  5000 non-null   object
 10  Vehicle_Speed          5000 non-null   int64 
 11  Vehicle_Plate_Number   5000 non-null   object
 12  Fraud_indicator        5000 non-null   object
dtypes: int64(4), object(9)
memory usage: 507.9+ KB


In [None]:
df.describe()

Unnamed: 0,Transaction_ID,Transaction_Amount,Amount_paid,Vehicle_Speed
count,5000.0,5000.0,5000.0,5000.0
mean,2500.5,161.062,141.261,67.8512
std,1443.520003,112.44995,106.480996,16.597547
min,1.0,0.0,0.0,10.0
25%,1250.75,100.0,90.0,54.0
50%,2500.5,130.0,120.0,67.0
75%,3750.25,290.0,160.0,82.0
max,5000.0,350.0,350.0,118.0


In [None]:
df.shape

(5000, 13)

In [None]:
# Convert 'Timestamp' column to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [None]:
# Check for missing / NaN Values on train and test

df.isnull()
print("Missing values in 'FastagID':", df['FastagID'].isnull().sum())

Missing values in 'FastagID': 549


In [None]:
df = df.dropna(subset=['FastagID'])

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4451 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Transaction_ID         4451 non-null   int64         
 1   Timestamp              4451 non-null   datetime64[ns]
 2   Vehicle_Type           4451 non-null   object        
 3   FastagID               4451 non-null   object        
 4   TollBoothID            4451 non-null   object        
 5   Lane_Type              4451 non-null   object        
 6   Vehicle_Dimensions     4451 non-null   object        
 7   Transaction_Amount     4451 non-null   int64         
 8   Amount_paid            4451 non-null   int64         
 9   Geographical_Location  4451 non-null   object        
 10  Vehicle_Speed          4451 non-null   int64         
 11  Vehicle_Plate_Number   4451 non-null   object        
 12  Fraud_indicator        4451 non-null   object        
dtypes: datet

* #### Check for class imbalance

In [None]:
import pandas as pd

# assume 'df' is your pandas dataframe and 'target' is the column with class labels
target_counts = df['Fraud_indicator'].value_counts()

print("Class distribution:")
print(target_counts)

minority_class_proportion = target_counts.min() / target_counts.sum()
print(f"Proportion of minority class: {minority_class_proportion:.2%}")

Class distribution:
Fraud_indicator
Not Fraud    3468
Fraud         983
Name: count, dtype: int64
Proportion of minority class: 22.08%


In [None]:
ros = RandomOverSampler(sampling_strategy='minority')
df, _ = ros.fit_resample(df, df['Fraud_indicator'])

In [None]:
df.columns

Index(['Transaction_ID', 'Timestamp', 'Vehicle_Type', 'FastagID',
       'TollBoothID', 'Lane_Type', 'Vehicle_Dimensions', 'Transaction_Amount',
       'Amount_paid', 'Geographical_Location', 'Vehicle_Speed',
       'Vehicle_Plate_Number', 'Fraud_indicator'],
      dtype='object')

In [None]:
df.shape

(6936, 13)

In [None]:
target_counts = df['Fraud_indicator'].value_counts()

print("Class distribution:")
print(target_counts)

minority_class_proportion = target_counts.min() / target_counts.sum()
print(f"Proportion of minority class: {minority_class_proportion:.2%}")

Class distribution:
Fraud_indicator
Fraud        3468
Not Fraud    3468
Name: count, dtype: int64
Proportion of minority class: 50.00%


* #### Preprocess the data: Normalize the Input features, and split the dataset into training and testing set.

In [None]:
df.columns

Index(['Transaction_ID', 'Timestamp', 'Vehicle_Type', 'FastagID',
       'TollBoothID', 'Lane_Type', 'Vehicle_Dimensions', 'Transaction_Amount',
       'Amount_paid', 'Geographical_Location', 'Vehicle_Speed',
       'Vehicle_Plate_Number', 'Fraud_indicator'],
      dtype='object')

In [None]:
#Select features Transaction_Amount, Amount_paid

selected_features = ['Transaction_Amount', 'Amount_paid']
X = df[selected_features]
y = df['Fraud_indicator']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)

In [None]:
df

Unnamed: 0,Transaction_ID,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Transaction_Amount,Amount_paid,Geographical_Location,Vehicle_Speed,Vehicle_Plate_Number,Fraud_indicator
0,1,2023-01-06 11:20:00,Bus,FTG-001-ABC-121,A-101,Express,Large,350,120,"13.059816123454882, 77.77068662374292",65,KA11AB1234,Fraud
1,2,2023-01-07 14:55:00,Car,FTG-002-XYZ-451,B-102,Regular,Small,120,100,"13.059816123454882, 77.77068662374292",78,KA66CD5678,Fraud
2,4,2023-01-09 02:05:00,Truck,FTG-044-LMN-322,C-103,Regular,Large,350,120,"13.059816123454882, 77.77068662374292",92,KA11GH3456,Fraud
3,5,2023-01-10 06:35:00,Van,FTG-505-DEF-652,B-102,Express,Medium,140,100,"13.059816123454882, 77.77068662374292",60,KA44IJ6789,Fraud
4,6,2023-01-11 10:00:00,Sedan,FTG-066-GHI-987,A-101,Regular,Medium,160,100,"13.059816123454882, 77.77068662374292",105,KA77KL0123,Fraud
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6931,4117,2023-02-07 16:14:00,SUV,FTG-166-GHI-432,B-102,Express,Large,140,70,"12.936687032945434, 77.53113977439017",82,MH45YZ4567,Fraud
6932,3460,2023-12-27 13:37:00,Bus,FTG-812-ZXC-654,C-103,Regular,Large,290,100,"13.042660878688794, 77.47580097259879",87,KA90UV1234,Fraud
6933,3715,2023-08-27 03:35:00,Truck,FTG-060-QAZ-218,C-103,Express,Large,340,90,"12.84197701525119, 77.67547528176169",62,MH45AB6789,Fraud
6934,3639,2023-02-26 05:43:00,Van,FTG-992-BVC-654,B-102,Express,Medium,130,100,"12.84197701525119, 77.67547528176169",44,KA23JK8901,Fraud


In [None]:
#Scaling and encoding output
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

## Model Architecture

* #### Design a feedforward neural netwerk architecture
* #### Define the number of layers, neuroms in each layer, and activation functions

In [None]:
k_reg = tf.keras.regularizers.L2(REG_LAMBDA)
dropout_rate = 0.15
model = tf.keras.Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],),kernel_regularizer=k_reg))  # Input layer with 64 neurons and ReLU activation
model.add(Dense(32, activation='relu'))  # Hidden layer with 32 neurons and ReLU activation
model.add(Dense(1, activation='sigmoid'))  # Output layer with 1 neuron and sigmoid activation

## Model Training

* #### Initialize the model parameters (weights and biase)
* #### Define a loss function appropriate for the task
* #### Train the model using gradient descent or any suitable optimization algoritm
* #### Monitor the training process by observing the loss and accuracy metrics


In [None]:
# Initialize the model parameters
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(loss=loss_fn, optimizer='adam', metrics=['accuracy'])

In [None]:
chechpoint_path=os.path.join(modelDir, '')

model_checkpoint_callback =tf.keras.callbacks.ModelCheckpoint(
    chechpoint_path,
    monitor='val_loss',
    verbose=2,
    save_best_only=True,
    save_weights_only=True,
    mode='auto'
#   save_freq='epoch',
#   initial_value_threshold=None
)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=PATIENCE,
    verbose=2,
    mode='auto',
    restore_best_weights=True
)

learning_rate_callback  = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=LR_Factor,
    patience=LR_PATIENCE,
    verbose=2,
    mode='auto',
    #min_delta=0.0001,
    #cooldown=0,
    min_lr=0.00001,
    # **kwargs
)

log_dir = os.path.join(logDir, 'fit')

In [None]:

# Train the model

history = model.fit(X_train,y_train, epochs=1000 , batch_size=20, validation_split=.2)

Epoch 1/1000


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 377, in dispatch_queue

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 250, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 748, in __init__

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-26-435e990c2c4e>", line 3, in <cell line: 3>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1807, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 143, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 270, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend.py", line 5775, in sparse_categorical_crossentropy

Received a label value of 1 which is outside the valid range of [0, 1).  Label values: 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 0 0
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_989]

In [None]:
model.summary()

In [None]:
# Plot the training loss curve

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

## Model Evaluation

* #### Evaluate the trained model on the testing deaset

* #### Calculate and report the accuracy


In [None]:
loss = pd.DataFrame(history.history)
loss

In [None]:
loss.plot(y = ['loss','val_loss']);

In [None]:
loss.plot(y = ['accuracy','val_accuracy']);

In [None]:
# Evaluate the trained model
y_pred = model.predict(X_test)
print(y_pred.shape)
print(y_pred.argmax(axis=1))

In [None]:
### Accuracy Score
accuracy = accuracy_score(y_test, y_pred.argmax(axis=1))
print(f'Training Accuracy: {accuracy:.3f}')

In [None]:
# summarize history for acc
plt.plot(history.history['Accuracy'])
plt.plot(history.history['val_Accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
## Classification Report

cm = confusion_matrix(y_test, y_pred.argmax(axis=1))
acc = round(accuracy_score(y_pred,y_test) * 100, 2)
print(cm)
print(acc,'%')

In [None]:
sns.heatmap(confusion_matrix(y_test, y_pred.argmax(axis=1)), annot = True, cmap='viridis')

In [None]:
print(classification_report(y_pred,y_test.argmax(axis=1)))

## Experimentation


* #### Experiment with different hyperparameters such as learning rate, nutiber of hidden hayers, and sunberr in each layer.
* #### Explore different activation function (eg. ReLL, sigmoid, sanh) and observe their impact on the model's performance.


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Load the Titanic dataset
titanic = pd.read_csv('titanic.csv')

# Preprocess the data
X = titanic.drop('Survived', axis=1)
y = titanic['Survived']

# Normalize the input features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function to create and train the model
def train_model(learning_rate, hidden_layers, neurons_per_layer, activation_function):
    model = Sequential()
    model.add(Dense(neurons_per_layer, activation=activation_function, input_shape=(X.shape[1],)))

    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons_per_layer, activation=activation_function))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.001)

    history = model.fit(X_train, y_train, epochs=100, batch_size=128, validation_data=(X_test, y_test), callbacks=[early_stopping])

    y_pred = model.predict(X_test)
    y_pred_class = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred_class)

    return accuracy, history

# Experiment with different hyperparameters
learning_rates = [0.01, 0.001, 0.0001]
hidden_layers_list = [1, 2, 3]
neurons_per_layer_list = [32, 64, 128]
activation_functions = ['relu', 'sigmoid', 'tanh']

best_accuracy = 0
best_config = None

for learning_rate in learning_rates:
    for hidden_layers in hidden_layers_list:
        for neurons_per_layer in neurons_per_layer_list:
            for activation_function in activation_functions:
                accuracy, history = train_model(learning_rate, hidden_layers, neurons_per_layer, activation_function)
                print(f"Learning Rate: {learning_rate}, Hidden Layers: {hidden_layers}, Neurons per Layer: {neurons_per_layer}, Activation Function: {activation_function}, Accuracy: {accuracy:.3f}")

                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_config = {
                        'learning_rate': learning_rate,
                        'hidden_layers': hidden_layers,
                        'neurons_per_layer': neurons_per_layer,
                        'activation_function': activation_function
                    }

print(f"Best Accuracy: {best_accuracy:.3f}")
print("Best Configuration:", best_config)

## Deliverables

* #### Python code (ipyab) implementing the neural network

* #### Training loss curve plot.

* #### Evaluation results saved in predictions.csv file

In [None]:
# Plot the training loss curve for the best configuration
plt.plot(best_config['history'].history['loss'], label='Training Loss')
plt.plot(best_config['history'].history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

# Save the evaluation results
y_pred = best_config['model'].predict(X_test)
y_pred_class = (y_pred > 0.5).astype(int)
pd.DataFrame({'Survived': y_test, 'Predicted': y_pred_class}).to_csv('predictions.csv', index=False)