Dataset Description:
1. Transaction_ID: Unique identifier for each transaction.
2. Timestamp: Date and time of the transaction.
3. Vehicle_Type: Type of vehicle involved in the transaction.
4. FastagID: Unique identifier for Fastag.
5. TollBoothID: Identifier for the toll booth.
6. Lane_Type: Type of lane used for the transaction.
7. Vehicle_Dimensions: Dimensions of the vehicle.
8. Transaction_Amount: Amount associated with the transaction.
9. Amount_paid: Amount paid for the transaction.
10. Geographical_Location: Location details of the transaction.
11. Vehicle_Speed: Speed of the vehicle during the transaction.
12. Vehicle_Plate_Number: License plate number of the vehicle.
13. Fraud_indicator: Binary indicator of fraudulent activity (target variable).

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
df=pd.read_csv('FastagFraudDetection.csv')

In [13]:
df.sample(5)

Unnamed: 0,Transaction_ID,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Transaction_Amount,Amount_paid,Geographical_Location,Vehicle_Speed,Vehicle_Plate_Number,Fraud_indicator
1875,1876,1/3/2023 18:30,SUV,FTG-024-GHI-678,B-102,Regular,Large,140,140,"12.936687032945434, 77.53113977439017",60,KA43AB1456,Not Fraud
3773,3774,5/16/2023 0:20,SUV,FTG-112-LPO-206,B-102,Regular,Large,150,150,"12.84197701525119, 77.67547528176169",96,AP12OP3456,Not Fraud
3885,3886,7/1/2023 1:20,SUV,FTG-599-WSX-543,B-102,Regular,Large,150,150,"12.84197701525119, 77.67547528176169",84,TN01PQ6789,Not Fraud
76,77,3/23/2023 21:20,SUV,FTG-067-LMK-983,B-102,Express,Large,180,180,"13.059816123454882, 77.77068662374292",69,KA12IJ8801,Not Fraud
2948,2949,6/11/2023 3:03,Bus,FTG-449-RFD-523,C-103,Express,Large,350,350,"13.059816123454882, 77.77068662374292",66,KA01JK8901,Not Fraud


In [11]:
df.shape

(5000, 13)

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Transaction_ID         5000 non-null   int64 
 1   Timestamp              5000 non-null   object
 2   Vehicle_Type           5000 non-null   object
 3   FastagID               4451 non-null   object
 4   TollBoothID            5000 non-null   object
 5   Lane_Type              5000 non-null   object
 6   Vehicle_Dimensions     5000 non-null   object
 7   Transaction_Amount     5000 non-null   int64 
 8   Amount_paid            5000 non-null   int64 
 9   Geographical_Location  5000 non-null   object
 10  Vehicle_Speed          5000 non-null   int64 
 11  Vehicle_Plate_Number   5000 non-null   object
 12  Fraud_indicator        5000 non-null   object
dtypes: int64(4), object(9)
memory usage: 507.9+ KB


In [15]:
df.isnull().sum()

Transaction_ID             0
Timestamp                  0
Vehicle_Type               0
FastagID                 549
TollBoothID                0
Lane_Type                  0
Vehicle_Dimensions         0
Transaction_Amount         0
Amount_paid                0
Geographical_Location      0
Vehicle_Speed              0
Vehicle_Plate_Number       0
Fraud_indicator            0
dtype: int64

In [18]:
df.describe(include='object')

Unnamed: 0,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Geographical_Location,Vehicle_Plate_Number,Fraud_indicator
count,5000,5000,4451,5000,5000,5000,5000,5000,5000
unique,4423,7,4451,6,2,3,5,5000,2
top,1/1/2023 8:15,Bus,FTG-001-ABC-121,B-102,Regular,Large,"13.059816123454882, 77.77068662374292",KA11AB1234,Not Fraud
freq,5,716,1,1432,2858,2144,1000,1,4017


In [29]:
df.columns

Index(['Transaction_ID', 'Timestamp', 'Vehicle_Type', 'FastagID',
       'TollBoothID', 'Lane_Type', 'Vehicle_Dimensions', 'Transaction_Amount',
       'Amount_paid', 'Geographical_Location', 'Vehicle_Speed',
       'Vehicle_Plate_Number', 'Fraud_indicator'],
      dtype='object')

In [33]:
column_values= ['Vehicle_Type', 'TollBoothID', 'Lane_Type', 'Vehicle_Dimensions', 'Fraud_indicator']

for column in column_values:
    print(f'Unique values for {column}:')
    print(df[column].unique())
    print()


Unique values for Vehicle_Type:
['Bus ' 'Car' 'Motorcycle' 'Truck' 'Van' 'Sedan' 'SUV']

Unique values for TollBoothID:
['A-101' 'B-102' 'D-104' 'C-103' 'D-105' 'D-106']

Unique values for Lane_Type:
['Express' 'Regular']

Unique values for Vehicle_Dimensions:
['Large' 'Small' 'Medium']

Unique values for Fraud_indicator:
['Fraud' 'Not Fraud']



In [19]:
df.describe()

Unnamed: 0,Transaction_ID,Transaction_Amount,Amount_paid,Vehicle_Speed
count,5000.0,5000.0,5000.0,5000.0
mean,2500.5,161.062,141.261,67.8512
std,1443.520003,112.44995,106.480996,16.597547
min,1.0,0.0,0.0,10.0
25%,1250.75,100.0,90.0,54.0
50%,2500.5,130.0,120.0,67.0
75%,3750.25,290.0,160.0,82.0
max,5000.0,350.0,350.0,118.0


In [25]:
df['TollBoothID'].unique()

array(['A-101', 'B-102', 'D-104', 'C-103', 'D-105', 'D-106'], dtype=object)