In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
ds=pd.read_csv(r"C:\Users\Amirhamza\OneDrive\Desktop\Programs\Fraud Detection\synthetic_fraud_dataset.csv")

In [None]:
ds.head()

In [None]:
ds.isnull().sum()

In [None]:
ds.describe()

In [None]:
ds.drop(columns=["transaction_id"],inplace=True)
ds.info()

In [None]:
x=ds.drop(columns=["is_fraud"])
y=ds["is_fraud"]

In [None]:
from sklearn.preprocessing import LabelEncoder
categorical_cols = ["transaction_type", "merchant_category", "country"]


In [None]:
for col in categorical_cols:
    print(f"{col} unique values: {ds[col].unique()}")
    print(f"{col} values: {ds[col].value_counts()}")


In [None]:
# Create a dictionary to save encoder
encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    ds[col] = le.fit_transform(ds[col])
    encoders[col] = le  # save the encoder 


In [None]:
for col in categorical_cols:
    print(f"{col} unique values: {ds[col].unique()}")
    print(f"{col} values: {ds[col].value_counts()}")

    

In [None]:
ds.head(20)

In [None]:
# Save to CSV
#ds.to_csv("encoded_transactions.csv", index=False)


In [None]:
sns.pairplot(data=ds, hue="is_fraud")
plt.show()


In [None]:
plt.figure(figsize=(20,7))
sns.heatmap(data=ds.corr(), annot=True)

#plt.savefig("heatmap.png", dpi=300, bbox_inches='tight')   # saved in same folder
plt.show()


In [None]:
ds["is_fraud"].value_counts(normalize=True) * 100


In [None]:
# Features (X) and target (y)
X = ds[
    [
        "amount",
        "transaction_type",
        "merchant_category",
        "country",
        "hour"
    ]
]

y = ds["is_fraud"]


In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x=scaler.transform(X)

In [None]:
from imblearn.over_sampling import SMOTE
#Synthetic Minority Oversampling Technique
smote = SMOTE(random_state=42)
x_train_res, y_train_res = smote.fit_resample(x_train, y_train)


In [None]:
print("Before SMOTE:")
print(y_train.value_counts())

print("\nAfter SMOTE:")
print(pd.Series(y_train_res).value_counts())


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(x_train_res, y_train_res)
y_pred_rf = rf.predict(x_test)


In [None]:
from sklearn.model_selection import cross_val_score,StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cross_val_score(rf,X,y, cv=skf)

In [None]:
acu_test=rf.score(x_test,y_test)
print("Accuracy_test",acu_test)
acu_train=rf.score(x_train,y_train)
print("Accuracy_train",acu_train)


In [None]:
from sklearn.metrics import confusion_matrix,roc_auc_score
# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))



In [None]:
roc_auc = roc_auc_score(y_test, y_pred_rf)
print("ROC-AUC:", roc_auc)

In [None]:
ds["predicted"]=rf.predict(x)
result=ds[["is_fraud","predicted"]]
#result_sorted = result.sort_values(by="user_id")
result.head()
#ds.to_csv("full_dataset_with_predictions.csv", index=False)


In [None]:
plt.figure(figsize=(12,4))

plt.scatter(ds.index, ds["is_fraud"],label="Given",marker="x",color='red')
plt.scatter(ds.index, result["predicted"], label="Predicted",marker= "*")#
#plt.savefig("Comparison.png", dpi=300, bbox_inches='tight')   # saved in same folder

plt.yticks([0,1])
plt.xlabel("Samples")
plt.ylabel("Fraud")
plt.legend()
plt.show()


In [None]:
amount = float(input("Enter the amount : "))
transaction_type = input("Enter the transition type : ").strip()
merchant_category = input("Enter the merchant_category : ").strip()
country = input("Enter the country : ").strip()
hour = float(input("Enter the hour : "))

new_data = pd.DataFrame({
    'amount': [amount],
    'transaction_type': [transaction_type],
    'merchant_category': [merchant_category],
    'country': [country],
    'hour': [hour]
})

categorical_cols = ["transaction_type", "merchant_category", "country"]

# encoding
for col in categorical_cols:
    new_data[col] = encoders[col].transform(new_data[col])

# ✅ correct scaling
Tran = scaler.transform(new_data)

# ✅ correct prediction
predict = rf.predict(Tran)[0]

if predict == 0:
    print("✅ It may NOT be Fraud")
else:
    print("⚠️ It may be Fraud")



