<a href="https://colab.research.google.com/github/ArmanZak/CODSOFT_DS_TASKS/blob/main/Task_05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Step 1: Upload the ZIP File
from google.colab import files
uploaded = files.upload()  # Upload ZIP manually

import zipfile
import io
import os
import glob
import pandas as pd
import numpy as np

# Step 2: Extract the ZIP File
for filename in uploaded.keys():
    if filename.endswith(".zip"):
        with zipfile.ZipFile(io.BytesIO(uploaded[filename]), 'r') as zip_ref:
            zip_ref.extractall("/content/dataset")

# Step 3: Find and Load the CSV File
csv_files = glob.glob("/content/dataset/**/*.csv", recursive=True)
if not csv_files:
    csv_files = glob.glob("/content/dataset/*.csv")

if not csv_files:
    raise Exception("No CSV file found in the uploaded ZIP!")

data_path = csv_files[0]
print("Loading:", data_path)
df = pd.read_csv(data_path)

# Step 4: Data Exploration
print(df.head())
print(df.info())
print("Class distribution:\n", df['Class'].value_counts())

# Step 5: Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

X = df.drop("Class", axis=1)
y = df["Class"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, stratify=y, random_state=42)

# Handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Step 6: Train Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_resampled, y_resampled)
log_preds = log_model.predict(X_test)

rf_model = RandomForestClassifier()
rf_model.fit(X_resampled, y_resampled)
rf_preds = rf_model.predict(X_test)

# Step 7: Evaluation
from sklearn.metrics import classification_report, confusion_matrix

print("=== Logistic Regression ===")
print(confusion_matrix(y_test, log_preds))
print(classification_report(y_test, log_preds))

print("=== Random Forest ===")
print(confusion_matrix(y_test, rf_preds))
print(classification_report(y_test, rf_preds))


Saving creditcard.csv.zip to creditcard.csv.zip
Loading: /content/dataset/creditcard.csv
   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  .