In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')


In [4]:
df = pd.read_csv("/content/creditcard.csv.zip")

BadZipFile: File is not a zip file

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
print(df.isnull().sum())

In [None]:
sns.countplot(x='Class', data=df)
plt.title('Distribution of Fraudulent Transactions')
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation')
plt.show()

In [None]:
if 'transcation_id' in df.columns:
    df.drop('transcation_id', axis=1, inplace=True)

In [None]:
df = pd.get_dummies(df, drop_first=True)

In [None]:
scaler = StandardScaler()
features = df.drop('Class', axis=1)
X = scaler.fit_transform(features)
y = df['Class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

In [None]:

print("Logistic Regression")
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)


In [None]:
print("Random Forest")
print(classification_report(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))

In [None]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

In [None]:
print("XGBoost Classifier")
print(classification_report(y_test, y_pred_xgb))
print(confusion_matrix(y_test, y_pred_xgb))

In [None]:
plt.figure(figsize=(10, 6))

In [None]:
from sklearn.metrics import roc_curve, auc


In [None]:
y_probs_lr = lr.predict_proba(X_test)[:, 1]
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_probs_lr)
plt.plot(fpr_lr, tpr_lr, label='Logistic Regression')

In [None]:
y_probs_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_probs_rf)
plt.plot(fpr_rf, tpr_rf, label='Random Forest')

In [None]:
y_probs_xgb = xgb.predict_proba(X_test)[:, 1]
fpr_xgb, tpr_xgb, _ = roc_curve(y_test, y_probs_xgb)
plt.plot(fpr_xgb, tpr_xgb, label='XGBoost')

In [None]:
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Comparison")
plt.legend()
plt.grid()
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 6, 10],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.8, 1],
}

In [None]:
grid = GridSearchCV(estimator=XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
                    param_grid=param_grid,
                    cv=3,
                    scoring='f1',
                    verbose=1,
                    n_jobs=-1)

grid.fit(X_train, y_train)

In [None]:
best_model = grid.best_estimator_
print("Best Parameters:", grid.best_params_)

In [None]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', best_model)
])

pipeline.fit(X_train, y_train)

In [None]:
sample = np.array([[120.0, 1, 0, 0, 5000, 1, 0, 1]])
sample_pred = pipeline.predict(sample)
print("Prediction (0 = Not Fraud, 1 = Fraud):", sample_pred[0])

In [None]:
!pip install streamlit

In [None]:
%%writefile app.py
import streamlit as st
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline


In [None]:
df = pd.read_csv("/content/creditcard.csv.zip")
df.head()

In [None]:
df = pd.get_dummies(df, drop_first=True)  # Handle categoricals
X = df.drop('Class', axis=1)
y = df['Class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)


In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss'))
])


In [None]:

pipeline.fit(X_train, y_train)


In [None]:
joblib.dump(pipeline, 'fraud_detection_pipeline.pkl')


In [None]:
model = joblib.load('fraud_detection_pipeline.pkl')


In [None]:
import streamlit as st


In [None]:
model = joblib.load('fraud_detection_pipeline.pkl')

st.title("🔍 Real-Time Fraud Detection Simulator")

In [None]:
amount = st.number_input("Transaction Amount")
feature1 = st.selectbox("Feature 1 (is_international)", [0, 1])
feature2 = st.selectbox("Feature 2 (card_present)", [0, 1])
feature3 = st.selectbox("Feature 3 (online)", [0, 1])
balance = st.number_input("Account Balance")
feature4 = st.selectbox("Feature 4", [0, 1])
feature5 = st.selectbox("Feature 5", [0, 1])
feature6 = st.selectbox("Feature 6", [0, 1])

In [None]:
if st.button("Predict Fraud"):
    user_input = np.array([[amount, V1, V2, V3, balance, V4, V5, V6]])
    prediction = model.predict(user_input)[0]
    result = "Fraudulent ❌" if prediction == 1 else "Legitimate ✅"
    st.success(f"Transaction is {result}")

In [None]:
!pip install pyngrok

In [None]:
!ngrok config add-authtoken YOUR_AUTHTOKEN_HERE


In [None]:
!ngrok config add-authtoken 2wXmWXNnOeaEkEzF6fepuiNIrf6_322P5buwUndXcXPrkJwdc


In [None]:
!cat /root/.ngrok2/ngrok.yml


In [None]:
!mkdir -p /root/.ngrok2
!echo "authtoken: YOUR_AUTHTOKEN_HERE" > /root/.ngrok2/ngrok.yml


In [None]:
public_url = ngrok.connect(addr="8501", proto="http")


In [None]:
from pyngrok import ngrok

get_ipython().system('pkill streamlit')

get_ipython().system_raw('streamlit run app.py &')

public_url = ngrok.connect(addr="8501", proto="http")
print("🚀 Streamlit App is Live at:", public_url)
