# Importing libraries

In [81]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Reading Data

In [82]:
df = pd.read_csv("Transactions Data.csv")
df.sample(1)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2470906,204,CASH_OUT,47207.62,C1549990343,0.0,0.0,C896737254,219485.61,266693.23,0,0


# Filling missing values

In [83]:
df = df.fillna(0)

# One Hot Encoding

In [84]:
# droping the id's columns to avoid unnecesary columns
df = df.drop(["nameOrig", "nameDest"], axis=1)
df = pd.get_dummies(df, drop_first = True)

# Converting Boolean to Binary

In [85]:
df[["type_CASH_OUT", "type_DEBIT", "type_PAYMENT", "type_TRANSFER"]] = df[["type_CASH_OUT", "type_DEBIT", "type_PAYMENT", "type_TRANSFER"]].astype(int)

# Standardazing Data

In [91]:
columns_to_exclude = ["type_CASH_OUT", "type_DEBIT", "type_PAYMENT", "type_TRANSFER", "isFraud", "isFlaggedFraud"]
columns_to_scale = [col for col in df.columns if col not in columns_to_exclude]


scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[columns_to_scale])
df_scaled = pd.DataFrame(scaled_data, columns=columns_to_scale)


df_scaled = pd.concat([df[columns_to_exclude], df_scaled], axis=1)

# Describing the data for future use

In [133]:
df[columns_to_scale].describe().round(2)

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
count,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0
mean,243.4,179861.9,833883.1,855113.67,1100702.0,1224996.0
std,142.33,603858.23,2888242.67,2924048.5,3399180.0,3674129.0
min,1.0,0.0,0.0,0.0,0.0,0.0
25%,156.0,13389.57,0.0,0.0,0.0,0.0
50%,239.0,74871.94,14208.0,0.0,132705.7,214661.4
75%,335.0,208721.48,107315.18,144258.41,943036.7,1111909.0
max,743.0,92445516.64,59585040.37,49585040.37,356015900.0,356179300.0


# Test Train split

In [93]:
x_train, x_test, y_train, y_test = train_test_split(df_scaled.drop("isFraud", axis = 1), df_scaled["isFraud"])

# Creating the model

In [94]:
LogReg = LogisticRegression()
LogReg.fit(x_train, y_train)

# Data entry for model prediction

In [151]:
data_to_predict = np.array([[0,0,0,0,0,0,0,0,0,0,0]])

data_to_predict = pd.DataFrame(data_to_predict, columns=x_train.columns)
data_to_predict

Unnamed: 0,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,isFlaggedFraud,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
0,0,0,0,0,0,0,0,0,0,0,0


In [155]:
# type of transaction input
# Dictionary mapping transaction types to DataFrame column values
tm = {
    "a": [1, 0, 0, 0],
    "b": [0, 1, 0, 0],
    "c": [0, 0, 1, 0],
    "d": [0, 0, 0, 1],
    "z": [0, 0, 0, 0] 
}


x = input("""Specify the type of the transaction

Cash out = a
Debit = b
Payment = c
Transfer = d
Other = z

""")

data_to_predict.loc[0, ['type_CASH_OUT', 'type_DEBIT', 'type_PAYMENT', 'type_TRANSFER']] = tm.get(x, [0, 0, 0, 0])







# Is flagged Fraud column
data_to_predict.loc[0, ['isFlaggedFraud']] = int(input("""Is the transaction flagged as fraud

Yes = 1
No = 0

"""))







# defining a function to speed the process
def normalize_and_update(df1, column_name, x):
    # Normalize the input
    x_normalized = (x - df[column_name].mean()) / df[column_name].std()
    
    # Update the DataFrame
    df1.at[0, column_name] = int(x_normalized)




# step input
x = int(input("""what is the step of this transaction (1-1000):


Note
step: Represents a unit of time in the transaction process, 
though the specific time unit is not specified in the dataset. It 
could denote hours, days, or another unit, depending on the context.

"""))
normalize_and_update(data_to_predict, 'step', x)


# amount input
x = int(input("What is the amount of transaction: "))
normalize_and_update(data_to_predict, 'amount', x)

# oldbalanceOrig input
x = int(input("What is the old balance of the origin account: "))
normalize_and_update(data_to_predict, 'oldbalanceOrg', x)

# newbalanceOrig input
x = int(input("What is the new balance of the origin account: "))
normalize_and_update(data_to_predict, 'newbalanceOrig', x)

# oldbalanceDest input
x = int(input("What is the old balance of the destination account: "))
normalize_and_update(data_to_predict, 'oldbalanceDest', x)

# newbalanceDest input
x = int(input("What is the new balance of the destination account: "))
normalize_and_update(data_to_predict, 'newbalanceDest', x)

Specify the type of the transaction

Cash out = a
Debit = b
Payment = c
Transfer = d
Other = z

 1
Is the transaction flagged as fraud

Yes = 1
No = 0

 1
what is the step of this transaction (1-1000):


Note
step: Represents a unit of time in the transaction process, 
though the specific time unit is not specified in the dataset. It 
could denote hours, days, or another unit, depending on the context.

 1
What is the amount of transaction:  1
What is the old balance of the origin account:  1
What is the new balance of the origin account:  1
What is the old balance of the destination account:  1
What is the new balance of the destination account:  1


In [156]:
LogReg.predict(data_to_predict)[0]

0

# Accuracy of model

In [150]:
LogReg.score(x_test, y_test)

0.9992116455170983