In [None]:
#import the required libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib


In [None]:
# Load the training dataset
fraudtrain = pd.read_csv("/content/drive/MyDrive/archive (12)/fraudTrain.csv")

# Load the testing dataset
fraudtest = pd.read_csv("/content/drive/MyDrive/archive (12)/fraudTest.csv")

# Sample a fraction of the data
fraudtrain = fraudtrain.sample(frac=0.1, random_state=42)
fraudtest = fraudtest.sample(frac=0.1, random_state=42)


In [None]:
# Data Preprocessing

# Handling missing data (if any)
fraudtrain.dropna(inplace=True)
fraudtest.dropna(inplace=True)

# Select relevant columns
relevant_columns = [
    'amt', 'lat', 'long', 'city_pop', 'unix_time', 'merch_lat', 'merch_long',
    'is_fraud'  #target column
]

fraudtrain = fraudtrain[relevant_columns]
fraudtest = fraudtest[relevant_columns]


In [None]:
# Scaling numerical features
scaler = StandardScaler()
numerical_features = ['amt', 'lat', 'long', 'city_pop', 'unix_time', 'merch_lat', 'merch_long']
fraudtrain.loc[:, numerical_features] = scaler.fit_transform(fraudtrain.loc[:, numerical_features])
fraudtest.loc[:, numerical_features] = scaler.transform(fraudtest.loc[:, numerical_features])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fraudtrain.loc[:, numerical_features] = scaler.fit_transform(fraudtrain.loc[:, numerical_features])


In [None]:
# Split the data into features (X) and target (y)
X_train = fraudtrain.drop("is_fraud", axis=1)
y_train = fraudtrain["is_fraud"]
X_test = fraudtest.drop("is_fraud", axis=1)
y_test = fraudtest["is_fraud"]

# Create and train the Logistic Regression model
logistic_model = LogisticRegression(random_state=42)
logistic_model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = logistic_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [None]:
# Print the results
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

Accuracy: 0.9952853955229252
Confusion Matrix:
[[55310    46]
 [  216     0]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     55356
           1       0.00      0.00      0.00       216

    accuracy                           1.00     55572
   macro avg       0.50      0.50      0.50     55572
weighted avg       0.99      1.00      0.99     55572



#Testing the model on sample input

In [None]:
while True:
    # Get user input for each feature
    amt = float(input("Enter Transaction Amount: "))
    lat = float(input("Enter Latitude: "))
    long = float(input("Enter Longitude: "))
    city_pop = float(input("Enter City Population: "))
    unix_time = float(input("Enter Unix Time: "))
    merch_lat = float(input("Enter Merchant Latitude: "))
    merch_long = float(input("Enter Merchant Longitude: "))

    # Scale user input to match the model's scaling
    user_input = scaler.transform([[amt, lat, long, city_pop, unix_time, merch_lat, merch_long]])

    # Predict whether it's fraud or not based on the user inputs
    prediction = logistic_model.predict(user_input)

    if prediction[0] == 1:
        print("The model predicts it's fraud.")
    else:
        print("The model predicts it's not fraud.")

    # Ask if the user wants to continue
    user_choice = input("Do you want to enter more data? (yes/no): ").lower()

    if user_choice != "yes":
        break

Enter Transaction Amount: 2000.0
Enter Latitude: 40.748817
Enter Longitude: -73.985428
Enter City Population: 15000
Enter Unix Time: 1478589875
Enter Merchant Latitude: 40.748817
Enter Merchant Longitude: -73.985428




The model predicts it's fraud.
Do you want to enter more data? (yes/no): yes
Enter Transaction Amount: 150.0
Enter Latitude: 33.689475
Enter Longitude: -117.543308
Enter City Population: 10000
Enter Unix Time: 1478589786
Enter Merchant Latitude: 33.689475
Enter Merchant Longitude: -117.543308




The model predicts it's not fraud.
Do you want to enter more data? (yes/no): no
