# APP

## Table of Contents
* [Preprocessing](#chapter1)
* [Model](#chapter2)
* [Active Learning](#chapter3)
* [App](#chapter4)

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
data= pd.read_csv('New CCTrans.csv')
data=data.drop(columns='Unnamed: 0')
data.columns
#data=data.iloc[:5,:]

Index(['is_fraud', 'amt', 'gender', 'dist_km', 'city_pop', 'trans_day',
       'trans_month', 'trans_year', 'trans_hour', 'trans_minute', 'age',
       'category_entertainment', 'category_food_dining',
       'category_gas_transport', 'category_grocery_net',
       'category_grocery_pos', 'category_health_fitness', 'category_home',
       'category_kids_pets', 'category_misc_net', 'category_misc_pos',
       'category_personal_care', 'category_shopping_net',
       'category_shopping_pos', 'category_travel', 'job_Agriculture',
       'job_Business', 'job_Creative', 'job_Education',
       'job_Engineering_Technical', 'job_Healthcare', 'job_Legal', 'job_Media',
       'job_Social_Services', 'state_AK', 'state_AL', 'state_AR', 'state_AZ',
       'state_CA', 'state_CO', 'state_CT', 'state_DC', 'state_FL', 'state_GA',
       'state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', 'state_KS',
       'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', 'state_MI',
       'state_MN

In [3]:
stat= pd.read_csv("Stat.csv")
stat= stat.drop(columns='Unnamed: 0')
stat

Unnamed: 0,feature,mean,variance
0,trans_day,15.850636,8.876237
1,trans_month,7.151836,3.424888
2,trans_year,2019.500663,0.5
3,trans_hour,12.806742,6.815632
4,trans_minute,29.497128,17.32637
5,age,45.75435,17.398243
6,amt,70.061827,159.256736
7,city_pop,88662.197575,301520.601135
8,dist_km,76.112445,29.116965


In [4]:
categories=['Entertainment', 'Food or Dining','Gas transport', 'grocery_net','grocery_pos', 'health_fitness', 'home',
            'kids_pets', 'misc_net','misc_pos','personal_care', 'shopping_net','shopping_pos', 'travel']

cat_col= ['category_entertainment', 'category_food_dining','category_gas_transport', 'category_grocery_net',
          'category_grocery_pos', 'category_health_fitness', 'category_home', 'category_kids_pets',
          'category_misc_net', 'category_misc_pos','category_personal_care', 'category_shopping_net','category_shopping_pos', 'category_travel']


states= ['AK', 'AL', 'AR', 'AZ','CA', 'CO', 'CT', 'DC', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA',
         'MA', 'MD', 'ME', 'MI','MN', 'MO', 'MS', 'MT', 'state_NC', 'ND','NE', 'NH', 'NJ', 'NM', 'NV', 'NY','OH',
         'OK', 'OR', 'PA', 'SC', 'SD','TN', 'TX', 'UT', 'VA', 'VT', 'WA','WI', 'WV', 'WY', 'other']

states_col= ['state_AK', 'state_AL', 'state_AR', 'state_AZ', 'state_CA', 'state_CO', 'state_CT', 'state_DC',
            'state_FL', 'state_GA','state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', 'state_KS',
            'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', 'state_MI','state_MN', 'state_MO',
            'state_MS', 'state_MT', 'state_NC', 'state_ND', 'state_NE', 'state_NH', 'state_NJ', 'state_NM',
            'state_NV', 'state_NY', 'state_OH', 'state_OK', 'state_OR', 'state_PA', 'state_SC', 'state_SD',
            'state_TN', 'state_TX', 'state_UT', 'state_VA', 'state_VT', 'state_WA', 'state_WI', 'state_WV',
            'state_WY', 'state_other']

jobs= ['engineering or technical', 'healthcare', 'creative or design', 'legal','education', 'business', 'media',
       'agriculture','social services']

jobs_col= ['job_Engineering_Technical', 'job_Healthcare', 'job_Creative', 'job_Legal','job_Education', 'job_Business',
           'job_Media','job_Agriculture','job_Social_Services']


<a id="chapter1"></a>
### Preprocessing

In [5]:
def preprocess(time, amt, age, city, lat, long, merch_lat, merch_long, cat, state, job, gender):
    
    # creating an empty dataframe to store the input
    columns=list(data.columns[1:])
    new_trans= pd.DataFrame(np.zeros(shape=(1, len(columns))), columns= columns)

    #1. CONVERTING THE INPUT
    #Datetime Feature
    usertime= pd.to_datetime(time)
    new_trans['trans_day'] = usertime.day # Day of the month
    new_trans['trans_month'] = usertime.month # Month
    new_trans['trans_year'] = usertime.year # Year
    new_trans['trans_hour'] = usertime.hour # Hour of the day
    new_trans['trans_minute'] = usertime.minute # Minute
    
    #Numeric Features
    new_trans['amt']= int(amt)
    new_trans['age']= int(age)
    new_trans['city_pop']= int(city)
    lat= float(lat)
    long= float(long)
    merch_lat= float(merch_lat)
    merch_long= float(merch_long)
    
    #2. ENCODING CATEGORICAL FEATURES
    #setting the column of the category to 1 and the rest zeros
    index= categories.index(cat)
    new_trans.loc[:,cat_col[index]]=1
    #setting the column of the state to 1 and the rest zeros
    index= states.index(state)
    new_trans.loc[:,states_col[index]]=1
    #setting the column of the job to 1 and the rest zeros
    index= jobs.index(job)
    new_trans.loc[:,jobs_col[index]]=1
    #encoding gender
    if gender =='Female':
        new_trans.gender= 1
    
    #3. NORMALIZING NUMERIC FEATURES
    # compute distance from lat and long
    import haversine as hs
    loc1= (lat,long)
    loc2= (merch_lat,merch_long)
    new_trans['dist_km']= hs.haversine(loc1,loc2)
    
    # Normalization
    new_trans['trans_day'] = (new_trans['trans_day']- stat.iloc[0,1])/ stat.iloc[0,2]
    new_trans['trans_month'] = (new_trans['trans_month']- stat.iloc[1,1])/ stat.iloc[1,2]
    new_trans['trans_year'] = (new_trans['trans_year']- stat.iloc[2,1])/ stat.iloc[2,2]
    new_trans['trans_hour'] = (new_trans['trans_hour']- stat.iloc[3,1])/ stat.iloc[3,2]
    new_trans['trans_minute'] = (new_trans['trans_minute']- stat.iloc[4,1])/ stat.iloc[4,2]
    new_trans['age']= (new_trans['age']- stat.iloc[5,1])/ stat.iloc[5,2]
    new_trans['amt']= (new_trans['amt']- stat.iloc[6,1])/ stat.iloc[6,2]
    new_trans['city_pop']= (new_trans['city_pop'] - stat.iloc[7,1])/ stat.iloc[7,2]
    new_trans['dist_km']= (new_trans['dist_km'] - stat.iloc[8,1])/ stat.iloc[8,2]
    
    print("Preprocesing Complete ...")
    return (new_trans)

<a id="chapter2"></a>
### Model

In [6]:
def train():
    data= pd.read_csv('New CCTrans.csv')
    data=data.drop(columns='Unnamed: 0')

    print("Currently training ...")
    
    from sklearn.ensemble import RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=65,criterion='entropy',
                                random_state=50, max_depth=57, min_samples_split=2,
                                min_samples_leaf=1, max_features=64, class_weight={0:1,1:3})
    rf.fit(data.iloc[:,1:],data.iloc[:,0])
    print("Training complete.")
    
    return rf

In [7]:
rf = train()

Currently training ...
Training complete.


In [8]:
def classify(new_trans, rf):
    y_pred= rf.predict(new_trans)
    
    if y_pred == 0:
        is_fraud = 'Not Fraudulent'
    else:
        is_fraud = 'Fraudulent'
    return (is_fraud)

<a id="chapter3"></a>
### Active Learning

In [11]:
def store(data, new_trans, is_fraud):
    
    if is_fraud == 'Not Fraudulent':
        is_fraud = 0
    if is_fraud =='Fraudulent':
        is_fraud = 1
    
    new_trans.insert(0, 'is_fraud', is_fraud)
    data= pd.concat([data, new_trans]).reset_index(drop=True)
    new_trans.to_csv('New CCTrans.csv', mode='a', index=True, header=False)
    
    print("Data stored ... Thank you!")
    
    return(data)

<a id="chapter4"></a>
### App

In [13]:
import tkinter
from tkinter import *
from tkinter import ttk
from tkinter import messagebox

global is_fraud
global data
is_fraud = ""

def enter_data():
    global is_fraud
    global new_trans
    global rf
    global data
    
    # section 1:
    name= name_entry.get()
    gender = gender_combobox.get()
    age = age_spinbox.get()
    state = state_combobox.get()
    city = city_entry.get()
    job = job_combobox.get()
    
    # section 2:
    amt = amt_spinbox.get()
    cat = cat_combobox.get()
    time= time_entry.get()
            
    # section 3:
    lat= lat_entry.get()
    long= long_entry.get()
    merch_lat= merch_lat_entry.get()
    merch_long= merch_long_entry.get()
            
    print("First name: ", name, "Age: ", age, "Gender: ", gender)
    print("City: ", city, "State: ", state, "Job: ", job)
    print("Transaction Amount : ", amt, "Category: ", cat, "Time: ", time)
    print("Customer Location: ", lat, "\t", long)
    print("Merchant Location: ", merch_lat, "\t", merch_long)
    print("------------------------------------------")
    
    #call preprocessing function
    new_trans = preprocess(time, amt, age, city, lat, long, merch_lat, merch_long, cat, state, job, gender)
    
    #call model to get classification
    is_fraud = classify(new_trans, rf)
    print(is_fraud)
    r2_label.config(text = is_fraud)

def update():
    global is_fraud
    global new_trans
    global rf
    global data
    
    status= check.get()
    
    # Update classification and store transaction
    if status == 'Incorrect':
        print("Incorrect class")
        
        if is_fraud == 'Fraudulent':
            is_fraud = 'Not Fraudulent'
            print("class updated 1")
            r2_label.config(text= is_fraud)
            
        else:
            is_fraud = 'Fraudulent' 
            print("class updated 2")
            r2_label.config(text= is_fraud)
            
        tkinter.messagebox.showinfo(title="Classification Complete", 
                                    message="Thank you! The model is currently retraining based on this update.")
        data= store(data, new_trans,is_fraud)
        rf = train() # Retraining the model
        
            
    if status == 'Correct':
        print("Correct class")
        r2_label.config(text= is_fraud)        
        tkinter.messagebox.showinfo(title="Classification Complete", message="Thank you!")
        #data= store(data, new_trans,is_fraud)
    
    
#-----------------
window = tkinter.Tk()
window.title("Credit Card Fruad Detection System")

frame = tkinter.Frame(window)
frame.pack()

# Section 1: Cardholder Info
user_info_frame =tkinter.LabelFrame(frame, text="Cardholder Information")
user_info_frame.grid(row= 0, column=0, padx=20, pady=10,  sticky='news')

name_label = tkinter.Label(user_info_frame, text="Name")
name_label.grid(row=0, column=0)
name_entry = tkinter.Entry(user_info_frame)
name_entry.grid(row=1, column=0)

age_label = tkinter.Label(user_info_frame, text="Age")
age_spinbox = tkinter.Spinbox(user_info_frame, from_=18, to=110)
age_label.grid(row=0, column=1)
age_spinbox.grid(row=1, column=1)

gender_label = tkinter.Label(user_info_frame, text="Gender")
gender_combobox = ttk.Combobox(user_info_frame, values=["Female", "Male"])
gender_label.grid(row=0, column=2)
gender_combobox.grid(row=1, column=2)

city_label = tkinter.Label(user_info_frame, text="City Population")
city_label.grid(row=2, column=0)
city_entry = tkinter.Entry(user_info_frame)
city_entry.grid(row=3, column=0)

state_label = tkinter.Label(user_info_frame, text="State")
state_combobox = ttk.Combobox(user_info_frame, values=states)
state_label.grid(row=2, column=1)
state_combobox.grid(row=3, column=1)

job_label = tkinter.Label(user_info_frame, text="Job Industry")
job_label.grid(row=2, column=2)
job_combobox = ttk.Combobox(user_info_frame, 
                            values=jobs)
job_combobox.grid(row=3, column=2)


# Section 2: Transaction Details
transaction_frame =tkinter.LabelFrame(frame, text="Transaction Details")
transaction_frame.grid(row= 1, column=0, padx=20, pady=10, sticky='news')

amt_label = tkinter.Label(transaction_frame, text="Transaction Amount")
amt_spinbox = tkinter.Spinbox(transaction_frame, from_=1, to="infinity")
amt_label.grid(row=0, column=0, padx=50, pady=10)
amt_spinbox.grid(row=1, column=0)

cat_label = tkinter.Label(transaction_frame, text="Category")
cat_combobox = ttk.Combobox(transaction_frame, values=categories)
cat_label.grid(row=0, column=1, padx=50, pady=10)
cat_combobox.grid(row=1, column=1)

time_label = tkinter.Label(transaction_frame, text="Date & Time")
time_label.grid(row=0, column=2, padx=20, pady=10)
time_entry = tkinter.Entry(transaction_frame)
time_entry.grid(row=1, column=2, padx=20, pady=10)
t_label = tkinter.Label(transaction_frame, text="Format: year-month-day hour:min:sec")
t_label.grid(row=2, column=2, padx=20)

#date_picker = Calendar(transaction_frame,selectmode = "day",year=2023,month=1,date=1)
#date_picker.grid(row=1, column=2)

#def fetch_date():
 #   date.config(text = "Selected Date is: " + date_picker.get_date())
#date_but = Button(transaction_frame,text="Select Date",command=fetch_date, bg="black", fg='white')
#date_but.grid(row=1, column=2)
#date_label = Label(transaction_frame,text="",bg='black',fg='white')
#date_label.grid(row=2, column=2)

#time_picker = AnalogPicker(root)
#time_picker.pack(expand=True, fill="both")

# Section 3: Locations
locations_frame =tkinter.LabelFrame(frame, text="Location Details")
locations_frame.grid(row= 3, column=0, padx=20, pady=10,  sticky='news')

lat_label = tkinter.Label(locations_frame, text="Your Latitude")
lat_label.grid(row=0, column=0, padx=150, pady=10)
lat_entry = tkinter.Entry(locations_frame)
lat_entry.grid(row=1, column=0)

long_label = tkinter.Label(locations_frame, text="Your Longitude")
long_label.grid(row=0, column=1)
long_entry = tkinter.Entry(locations_frame)
long_entry.grid(row=1, column=1)

merch_lat_label = tkinter.Label(locations_frame, text="Merchant Latitude")
merch_lat_label.grid(row=2, column=0)
merch_lat_entry = tkinter.Entry(locations_frame)
merch_lat_entry.grid(row=3, column=0)

merch_long_label = tkinter.Label(locations_frame, text="Merchant Longitude")
merch_long_label.grid(row=2, column=1)
merch_long_entry = tkinter.Entry(locations_frame)
merch_long_entry.grid(row=3, column=1)

# Enter Data Button
button1 = tkinter.Button(frame, text="Enter data",  command= enter_data)
button1.grid(row=4, column=0, sticky='news', padx=20, pady=10)

# Section 4: Classification result
result_frame = tkinter.LabelFrame(frame, text="Transaction Classification")
result_frame.grid(row= 5, column=0, padx=20, pady=10,  sticky='news')

r1_label = tkinter.Label(result_frame, text="This transaction is classified as:")
r1_label.grid(row=0, column=1)

r2_label = tkinter.Label(result_frame, text= is_fraud)
r2_label.grid(row=0, column=2)

r3_label= tkinter.Label(result_frame, text="Is this classification correct?")
r3_label.grid(row=1, column=0)

check = tkinter.StringVar(value="Correct")
incorrect_check = tkinter.Checkbutton(result_frame, text= "Incorrect Classification",
                                      variable=check, onvalue="Incorrect", offvalue="Correct")
incorrect_check.grid(row=2, column=1)

correct_check = tkinter.Checkbutton(result_frame, text= "Correct Classification",
                                  variable=check, onvalue="Correct", offvalue="Incorrect")
correct_check.grid(row=2, column=0)
 

# Update Classification Button
button2 = tkinter.Button(frame, text="Update Records",  command= update)
button2.grid(row=6, column=0, sticky='news', padx=20, pady=10)

# Classification check Buttons
#button_corr = tkinter.Button(result_frame, text="Correct",  command= correct_class())
#button_corr.grid(row=1, column=1, padx=20, pady=10)

#button_incorr = tkinter.Button(result_frame, text="Incorrect",  command= correct_class())
#button_incorr.grid(row=1, column=2, padx=20, pady=10)

for widget in user_info_frame.winfo_children():
    widget.grid_configure(padx=10, pady=5)

window.mainloop()

First name:  Essam Age:  23 Gender:  Male
City:  7182616 State:  DC Job:  creative or design
Transaction Amount :  799 Category:  shopping_net Time:  2020-02-15 12:44:50
Customer Location:  38.9072 	 77.0369
Merchant Location:  25.2048 	 55.2708
------------------------------------------
Preprocesing Complete ...
Not Fraudulent
Correct class




First name:  Essam Age:  50 Gender:  Male
City:  7182616 State:  DC Job:  creative or design
Transaction Amount :  799 Category:  shopping_net Time:  2020-02-15 12:44:50
Customer Location:  38.9072 	 77.0369
Merchant Location:  25.2048 	 55.2708
------------------------------------------
Preprocesing Complete ...
Fraudulent
Incorrect class
class updated 1




Data stored ... Thank you!
Currently training ...
Training complete.
Correct class




In [14]:
data.tail()

Unnamed: 0,is_fraud,amt,gender,dist_km,city_pop,trans_day,trans_month,trans_year,trans_hour,trans_minute,...,state_TN,state_TX,state_UT,state_VA,state_VT,state_WA,state_WI,state_WV,state_WY,state_other
1851956,0,0.225222,0.0,0.7877,-0.291069,0.580129,-0.336314,0.998675,-0.118366,-1.009855,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1851957,0,0.03038,0.0,0.292452,-0.290316,0.580129,-0.336314,0.998675,-0.118366,-0.95214,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1851958,0,-0.41293,0.0,0.26587,-0.293327,0.580129,-0.336314,0.998675,-0.118366,-0.95214,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1851959,1,4.577126,0.0,84.750727,2.07002,-0.095833,-1.504235,0.998675,-0.118366,0.83704,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1851960,0,4.577126,0.0,84.750727,23.527261,-0.095833,-1.504235,0.998675,-0.118366,0.83704,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#data= pd.read_csv('New CCTrans.csv')
#data=data.drop(columns=['Unnamed: 0','Unnamed: 0.1'])
#data

In [None]:
data= data.iloc[:-1,:]
data.to_csv('New CCTrans.csv')

In [None]:
is_fraud