<a href="https://colab.research.google.com/github/Janani1222/Brainwave_Matrix_Intern_/blob/main/task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
url = "https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv"
df = pd.read_csv(url)
print("Dataset loaded successfully!")
print(f"Dataset shape: {df.shape}")
print("Class distribution:\n", df["Class"].value_counts())
X = df.drop(columns=["Class"])
y = df["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print("\nPerforming anomaly detection using Isolation Forest...")
iso_forest = IsolationForest(n_estimators=100, contamination=0.002, random_state=42)
y_pred_anomaly = iso_forest.fit_predict(X_test)
y_pred_anomaly = np.where(y_pred_anomaly == -1, 1, 0)
print("\nAnomaly Detection Results:")
print(classification_report(y_test, y_pred_anomaly))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_anomaly))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_anomaly))
print("\nHandling imbalanced data with SMOTE...")
smote = SMOTE(sampling_strategy=0.1, random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print("Class distribution after SMOTE:\n", pd.Series(y_train_smote).value_counts())
print("\nTraining supervised learning model (Logistic Regression)...")
lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train_smote, y_train_smote)
y_pred_supervised = lr_model.predict(X_test)
y_proba_supervised = lr_model.predict_proba(X_test)[:, 1]
print("\nSupervised Learning Results:")
print(classification_report(y_test, y_pred_supervised))
print("ROC AUC Score:", roc_auc_score(y_test, y_proba_supervised))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_supervised))


Dataset loaded successfully!
Dataset shape: (284807, 31)
Class distribution:
 Class
0    284315
1       492
Name: count, dtype: int64

Performing anomaly detection using Isolation Forest...

Anomaly Detection Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.21      0.24      0.23       148

    accuracy                           1.00     85443
   macro avg       0.60      0.62      0.61     85443
weighted avg       1.00      1.00      1.00     85443

ROC AUC Score: 0.6208302504978745
Confusion Matrix:
 [[85160   135]
 [  112    36]]

Handling imbalanced data with SMOTE...
Class distribution after SMOTE:
 Class
0    199020
1     19902
Name: count, dtype: int64

Training supervised learning model (Logistic Regression)...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Supervised Learning Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.47      0.82      0.60       148

    accuracy                           1.00     85443
   macro avg       0.73      0.91      0.80     85443
weighted avg       1.00      1.00      1.00     85443

ROC AUC Score: 0.9592087793872774
Confusion Matrix:
 [[85155   140]
 [   26   122]]
