In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [22]:
fraud_train = pd.read_csv('fraudTrain.csv')
fraud_test = pd.read_csv('fraudTest.csv')

In [23]:
# Drop unnecessary columns
cols_to_drop = ['trans_date_trans_time', 'cc_num', 'first', 'last', 'street', 'city', 'state', 'zip', 'job', 'dob', 'trans_num', 'unix_time']
fraud_train = fraud_train.drop(columns=cols_to_drop)
fraud_test = fraud_test.drop(columns=cols_to_drop)

In [24]:
# Encode categorical variables
label_encoders = {}
for col in ['merchant', 'category', 'gender']:
    label_encoders[col] = LabelEncoder()
    fraud_train[col] = label_encoders[col].fit_transform(fraud_train[col])
    fraud_test[col] = label_encoders[col].transform(fraud_test[col])

In [25]:
# Separate features and target variable
X_train = fraud_train.drop(columns=['is_fraud'])
y_train = fraud_train['is_fraud']
X_test = fraud_test.drop(columns=['is_fraud'])
y_test = fraud_test['is_fraud']

In [28]:
# Train models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

In [None]:
for name, model in models.items():
    model.fit(X_train, y_train)

In [None]:
# Evaluate models
results = {}
for name, model in models.items():
    y_pred = model.predict(X_test)
    results[name] = confusion_matrix(y_test, y_pred)

In [None]:
# Print confusion matrices
for name, cm in results.items():
    print(f"Confusion Matrix for {name}:")
    print(cm)
    print()