In [None]:
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
transactions = pd.read_csv('transactions_modified.csv')
# print(transactions.head()) 
# print(transactions.info())
# print(transactions['amount'].describe())

# Create features and label variables
features = transactions[['amount', 'isPayment', 'isMovement', 'accountDiff']]
label = transactions[['isFraud']]

# Split dataset
x_train, x_test, y_train, y_test = train_test_split(features, label, test_size=0.3)

# Normalize the features variables
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Fit the model to the training data
logistic = LogisticRegression()
logistic.fit(x_train, y_train)

# Score the model and observe coefficients
print(logistic.score(x_train, y_train))
print(logistic.score(x_test, y_test))
print(logistic.coef_)

### Testing on new data ###
# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])

# Combine new transactions into a single array
sample_transactions = np.stack((transaction1, transaction2, transaction3))

# Normalize the new transactions
sample_transactions = scaler.transform(sample_transactions)

# Predict fraud on the new transactions
print(logistic.predict(sample_transactions))

# Show probabilities on the new transactions
print(logistic.predict_proba(sample_transactions))