In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer


In [7]:
data = pd.read_csv('/content/transaction_dataset.csv')

In [8]:
data_cleaned = data.drop(columns=['Unnamed: 0', 'Index', 'Address'])

In [9]:

encoded_columns = encoder.fit_transform(data_cleaned[object_columns])
encoded_columns_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(object_columns))
data_encoded = pd.concat([data_cleaned.drop(object_columns, axis=1), encoded_columns_df], axis=1)




In [10]:
scaler = StandardScaler()
features = data_encoded.drop(columns=['FLAG'])
scaled_features = scaler.fit_transform(features)

In [11]:
imputer = SimpleImputer(strategy='mean')
imputed_features = imputer.fit_transform(scaled_features)

In [12]:
pca = PCA(n_components=0.95)
principal_components = pca.fit_transform(imputed_features)
final_features = pd.DataFrame(principal_components)
final_labels = data_encoded['FLAG']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(final_features, final_labels, test_size=0.2, random_state=42)

In [14]:
models = {
    'Default Parameters': RandomForestClassifier(random_state=42),
    'More Trees': RandomForestClassifier(n_estimators=200, random_state=42),
    'Deeper Trees': RandomForestClassifier(max_depth=10, random_state=42)
}

In [15]:
scores = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    scores[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred)
    }

In [16]:
for name, score in scores.items():
    print(f"Results for {name}:")
    for metric, value in score.items():
        print(f"{metric}: {value:.4f}")
    print("\n")

Results for Default Parameters:
Accuracy: 0.9873
Precision: 0.9879
Recall: 0.9532
F1 Score: 0.9702


Results for More Trees:
Accuracy: 0.9878
Precision: 0.9903
Recall: 0.9532
F1 Score: 0.9714


Results for Deeper Trees:
Accuracy: 0.9878
Precision: 0.9903
Recall: 0.9532
F1 Score: 0.9714


