# Importing Necessities

In [1]:
import numpy as np
import pandas as pd

# Importing Dataset and Describing Dataset

In [2]:
df = pd.read_csv("FraudTrain.csv", index_col=0)

In [3]:
df.head()

Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,city,...,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud
0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,Moravian Falls,...,36.0788,-81.1781,3495,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0
1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,Orient,...,48.8878,-118.2105,149,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0
2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,Malad City,...,42.1808,-112.262,4154,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0
3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,Boulder,...,46.2306,-112.1138,1939,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0
4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,Doe Hill,...,38.4207,-79.4629,99,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0


### Description of the columns in the Dataset:
+ trans_date_trans_time - Transaction DateTime
+ cc_num - Credit Card Number of Customer
+ merchant - Merchant Name
+ category - Category of Merchant
+ amt - Amount of Transaction
+ first - First Name of Credit Card Holder
+ last - Last Name of Credit Card Holder
+ gender - Gender of Credit Card Holder
+ street - Street Address of Credit Card Holder
+ city - City of Credit Card Holder
+ state - State of Credit Card Holder
+ zip - Zip of Credit Card Holder
+ lat - Latitude Location of Credit Card Holder
+ long - Longitude Location of Credit Card Holder
+ city_pop - Credit Card Holder's City Population
+ job - Job of Credit Card Holder
+ dob - Date of Birth of Credit Card Holder
+ trans_num - Transaction Number
+ unix_time - UNIX Time of transaction
+ merch_lat - Latitude Location of Merchant
+ merch_long - Longitude Location of Merchant
+ is_fraud - Fraud Flag

# Data Preprocessing

## Seperating Features and Target Value

In [4]:
X = df.iloc[:,0:-1]
y = df.iloc[:,-1]

## Generating age from the date of birth column

In [5]:
X['dob'] = pd.to_datetime(X['dob'])

# The reference date is the date the dataset was generated on
reference_date = pd.Timestamp('2020-12-31')

X['age'] = (reference_date - X['dob']).dt.days // 365

In [6]:
print(X[['dob', 'age']].head())

         dob  age
0 1988-03-09   32
1 1978-06-21   42
2 1962-01-19   58
3 1967-01-12   54
4 1986-03-28   34


## Dropping Date of Birth column from features

In [7]:
X = X.drop('dob', axis=1)

In [8]:
"dob" in X.columns

False

## Generating transaction day of the week from the transaction date and time column

In [9]:
X['trans_date_trans_time'] = pd.to_datetime(X['trans_date_trans_time'])
X['trans_day_of_week'] = X['trans_date_trans_time'].dt.dayofweek

In [10]:
X['trans_day_of_week'].unique()

array([1, 2, 3, 4, 5, 6, 0], dtype=int32)

## Generating transaction month from the transaction date and time column

In [11]:
X['trans_month'] = X['trans_date_trans_time'].dt.month

In [12]:
X['trans_month'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32)

## Dropping transaction date and time column from features

In [13]:
X = X.drop('trans_date_trans_time', axis=1)

## Dropping name columns from features

In [14]:
X = X.drop(columns=['first', 'last'])

## Dropping homogeneous columns

In [15]:
X = X.drop(columns=['cc_num', 'merchant','job','street','trans_num'])