<a href="https://colab.research.google.com/github/Sans7349/CODSOFT/blob/main/TASK_2_CREDIT_CARD_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import the required libraries and setup
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Load the dataset
df = pd.read_csv('/content/fraudTest.csv')

# Sample a subset of the data to reduce memory usage
df = df.sample(frac=0.1, random_state=42)  # Adjust frac to get the desired subset size

# DATA PREPROCESS
# Convert categorical variables to numeric using one-hot encoding
categorical_columns = ['category', 'job', 'gender']
df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# Ensure all columns are numeric
non_numeric_columns = df.select_dtypes(include=['object']).columns
for col in non_numeric_columns:
    df[col] = df[col].astype('category').cat.codes

# Check for missing values and drop if any
df = df.dropna()

# Drop irrelevant or redundant columns
columns_to_drop = ['trans_date_trans_time', 'cc_num', 'first', 'last', 'street', 'city', 'state', 'zip', 'lat', 'long', 'city_pop', 'dob', 'trans_num', 'unix_time', 'merch_lat', 'merch_long']
columns_to_drop = [col for col in columns_to_drop if col in df.columns]
df = df.drop(columns=columns_to_drop)

# FEATURE ENGINEERING
# Separate features and target variable
X = df.drop(columns=['is_fraud'])
y = df['is_fraud']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# MODEL SELECTION & TRAINING
# Logistic Regression
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)

# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

# MODEL EVALUATION
# Logistic Regression Evaluation
def evaluate_logistic_regression():
    print("Logistic Regression:")
    print(classification_report(y_test, lr_predictions))
    print("Confusion Matrix:\n")
    print(confusion_matrix(y_test, lr_predictions))
    print(f"Accuracy: {accuracy_score(y_test, lr_predictions)}")
    print(f"Precision: {precision_score(y_test, lr_predictions)}")
    print(f"Recall: {recall_score(y_test, lr_predictions)}")
    print(f"F1 Score: {f1_score(y_test, lr_predictions)}")

evaluate_logistic_regression()

# Decision Tree Evaluation
def evaluate_decision_tree():
    dt_predictions = dt_model.predict(X_test)
    print("Decision Tree:\n")
    print(classification_report(y_test, dt_predictions))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, dt_predictions))
    print(f"Accuracy: {accuracy_score(y_test, dt_predictions)}")
    print(f"Precision: {precision_score(y_test, dt_predictions)}")
    print(f"Recall: {recall_score(y_test, dt_predictions)}")
    print(f"F1 Score: {f1_score(y_test, dt_predictions)}")

evaluate_decision_tree()

# Random Forest Evaluation
def evaluate_random_forest():
    rf_predictions = rf_model.predict(X_test)
    print("Random Forest:\n")
    print(classification_report(y_test, rf_predictions))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, rf_predictions))
    print(f"Accuracy: {accuracy_score(y_test, rf_predictions)}")
    print(f"Precision: {precision_score(y_test, rf_predictions)}")
    print(f"Recall: {recall_score(y_test, rf_predictions)}")
    print(f"F1 Score: {f1_score(y_test, rf_predictions)}")

evaluate_random_forest()

def classify_transactions(model, new_data):
    new_data = pd.get_dummies(new_data, columns=categorical_columns, drop_first=True)
    new_data = new_data.reindex(columns=X.columns, fill_value=0)
    predictions = model.predict(new_data)
    new_data['Prediction'] = predictions
    new_data['Prediction'] = new_data['Prediction'].map({0: 'Legitimate', 1: 'Fraudulent'})

    return new_data

# Example usage with new data
new_transactions = pd.DataFrame({
    'category': ['food_dining', 'gas_transport', 'kids_pets', 'health_fitness', 'home'],
    'job': ['Doctor', 'Engineer', 'Police', 'Scientist', 'Buisness Analyst'],
    'gender': ['M', 'F', 'M', 'F', 'M']
})
classified_data = classify_transactions(lr_model, new_transactions)
print(classified_data)

Logistic Regression:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4482
         1.0       0.00      0.00      0.00        19

    accuracy                           0.99      4501
   macro avg       0.50      0.50      0.50      4501
weighted avg       0.99      0.99      0.99      4501

Confusion Matrix:

[[4478    4]
 [  19    0]]
Accuracy: 0.9948900244390135
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
Decision Tree:

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4482
         1.0       0.50      0.32      0.39        19

    accuracy                           1.00      4501
   macro avg       0.75      0.66      0.69      4501
weighted avg       1.00      1.00      1.00      4501

Confusion Matrix:
[[4476    6]
 [  13    6]]
Accuracy: 0.9957787158409243
Precision: 0.5
Recall: 0.3157894736842105
F1 Score: 0.3870967741935484
Random Forest:

              precision    recal