In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the dataset (assuming the dataset is in CSV format)
data = pd.read_csv('/content/drive/MyDrive/creditcard.csv')

# Separate features and target variable
X = data.drop('Class', axis=1)
y = data['Class']

# Step 1: Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y)  # Stratified split to maintain class distribution

# Step 3: Train the logistic regression model with class weights
model = LogisticRegression(max_iter=1000, solver='lbfgs', class_weight='balanced')
model.fit(X_train, y_train)

# Step 4: Make probability predictions on the test set
y_prob = model.predict_proba(X_test)[:, 1]

# Step 5: Adjust the threshold for prediction
threshold = 0.2  # Example threshold to control sensitivity
y_pred_adjusted = (y_prob >= threshold).astype(int)

# Step 6: Evaluate model performance
precision = precision_score(y_test, y_pred_adjusted)
recall = recall_score(y_test, y_pred_adjusted)
f1 = f1_score(y_test, y_pred_adjusted)

print("Adjusted Precision:", precision)
print("Adjusted Recall:", recall)
print("Adjusted F1-Score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred_adjusted))

# Step 7: Check the number of predicted fraud cases
predicted_positives = sum(y_pred_adjusted)
print(f"Number of predicted positives (fraud cases): {predicted_positives}")
from imblearn.over_sampling import SMOTE

# Step 1: Apply SMOTE to the training set
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Step 2: Train the model again on the resampled data
model = LogisticRegression(max_iter=1000, solver='lbfgs')
model.fit(X_train_resampled, y_train_resampled)

# Step 3: Make predictions and evaluate
y_prob = model.predict_proba(X_test)[:, 1]
y_pred_adjusted = (y_prob >= threshold).astype(int)

# Evaluate model performance again
precision = precision_score(y_test, y_pred_adjusted)
recall = recall_score(y_test, y_pred_adjusted)
f1 = f1_score(y_test, y_pred_adjusted)

print("Adjusted Precision after SMOTE:", precision)
print("Adjusted Recall after SMOTE:", recall)
print("Adjusted F1-Score after SMOTE:", f1)
print("\nClassification Report after SMOTE:\n", classification_report(y_test, y_pred_adjusted))

predicted_positives = sum(y_pred_adjusted)
print(f"Number of predicted positives (fraud cases) after SMOTE: {predicted_positives}")



from sklearn.ensemble import RandomForestClassifier

# Step 1: Train a Random Forest model with class weights
rf_model = RandomForestClassifier(n_estimators=10, max_depth=5, class_weight='balanced', random_state=42)

rf_model.fit(X_train, y_train)

# Step 2: Make predictions
y_pred_rf = rf_model.predict(X_test)

# Step 3: Evaluate model performance
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

print("Random Forest Adjusted Precision:", precision_rf)
print("Random Forest Adjusted Recall:", recall_rf)
print("Random Forest Adjusted F1-Score:", f1_rf)
print("\nRandom Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

# Step 4: Check the number of predicted fraud cases
predicted_positives_rf = sum(y_pred_rf)
print(f"Number of predicted positives (fraud cases) with Random Forest: {predicted_positives_rf}")



Adjusted Precision: 0.01637302011033992
Adjusted Recall: 0.9387755102040817
Adjusted F1-Score: 0.032184712261675706

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.90      0.95     56864
           1       0.02      0.94      0.03        98

    accuracy                           0.90     56962
   macro avg       0.51      0.92      0.49     56962
weighted avg       1.00      0.90      0.95     56962

Number of predicted positives (fraud cases): 5619
Adjusted Precision after SMOTE: 0.016242555495397944
Adjusted Recall after SMOTE: 0.9183673469387755
Adjusted F1-Score after SMOTE: 0.03192055328959035

Classification Report after SMOTE:
               precision    recall  f1-score   support

           0       1.00      0.90      0.95     56864
           1       0.02      0.92      0.03        98

    accuracy                           0.90     56962
   macro avg       0.51      0.91      0.49     56962
weighted avg       1.