In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import joblib
from datetime import datetime
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load the trained model
tree_model = joblib.load("tree_model.pkl")
random_forst_model = joblib.load("random_forst_model.pkl")
xgb_model = joblib.load("xgboost_model.pkl")

In [3]:
def get_input():
    print("Enter transaction details:")
    transaction_data = {
        "trans_date_trans_time": [input("Enter transaction date and time (e.g., 2024-12-17 14:30:00): ")],
        "category": [input("Enter transaction category: ")],
        "amt": [float(input("Enter amount: "))],
        "gender": [input("Enter gender (M/F): ")],
        "state": [input("Enter State: ")],
        "lat": [float(input("Enter latitude: "))],
        "long": [float(input("Enter longitude: "))],
        "city_pop": [int(input("Enter city population: "))],
        "dob": [input("Enter date of birth (e.g., 1990-01-01): ")],
        "merch_lat": [float(input("Enter merchant latitude: "))],
        "merch_long": [float(input("Enter merchant longitude: "))]
    }

    return pd.DataFrame(transaction_data)

In [4]:
def preprocess_data(transaction_data):
    transaction_data["dob"] = pd.to_datetime(transaction_data["dob"])
    today = datetime.today()
    transaction_data["age"] = transaction_data["dob"].apply(lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))

    transaction_data["lat_distance"] = abs(round(transaction_data["merch_lat"] - transaction_data["lat"], 3))
    transaction_data["long_distance"] = abs(round(transaction_data["merch_long"] - transaction_data["long"], 3))

    transaction_data["trans_date_trans_time"] = pd.to_datetime(transaction_data["trans_date_trans_time"])
    transaction_data["trans_month"] = transaction_data["trans_date_trans_time"].dt.month

    encoder = LabelEncoder()
    transaction_data["category"] = encoder.fit_transform(transaction_data["category"])
    transaction_data["state"] = encoder.fit_transform(transaction_data["state"])
    transaction_data["gender"] = encoder.fit_transform(transaction_data["gender"])
    
    selected_columns = [
        "category", "gender", "trans_month", "amt", "state",
        "lat_distance", "long_distance", "city_pop", "age"
    ]
    return transaction_data[selected_columns]

In [5]:
def make_prediction(preprocessed_data, model_name):
    model = joblib.load(model_name)
    features = ["category", "gender", "trans_month", "amt", "state", "lat_distance", "long_distance", "city_pop", "age"]
    predictions = model.predict(preprocessed_data[features])

    return predictions

In [16]:
if __name__ == "__main__":
    transaction_df = get_input()
    processed_df = preprocess_data(transaction_df)
    model_file = "tree_model.pkl"
    prediction = make_prediction(processed_df, model_file)
    processed_df = processed_df.copy()
    processed_df["prediction"] = prediction
    output_df = pd.DataFrame(processed_df)

Enter transaction details:


Enter transaction date and time (e.g., 2024-12-17 14:30:00):  2023-12-10 01:25:02
Enter transaction category:  Food
Enter amount:  70.50
Enter gender (M/F):  F
Enter State:  CA
Enter latitude:  23.3653
Enter longitude:  -123.213
Enter city population:  40000000
Enter date of birth (e.g., 1990-01-01):  1995-12-15
Enter merchant latitude:  54.5522
Enter merchant longitude:  123.321



Final Output with Prediction:


In [17]:
print("\nFinal Output with Prediction:")
output_df.head()


Final Output with Prediction:


Unnamed: 0,category,gender,trans_month,amt,state,lat_distance,long_distance,city_pop,age,prediction
0,0,0,12,70.5,0,31.187,246.534,40000000,29,0
