<a href="https://colab.research.google.com/github/Manojkumar063/fraud-transcation/blob/main/fraud_transactions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Set a random seed for reproducibility
np.random.seed(42)

# Number of samples
num_samples = 10000

# Generate random 'Amount' and 'Time' features
amounts = np.random.uniform(low=1, high=1000, size=num_samples)
times = np.random.uniform(low=0, high=24, size=num_samples)

# Create a balanced distribution for the 'Class' variable
fraud_indices = np.random.choice(num_samples, size=num_samples // 20, replace=False)
class_labels = np.zeros(num_samples)
class_labels[fraud_indices] = 1

# Create a DataFrame
data = pd.DataFrame({
    'Amount': amounts,
    'Time': times,
    'Class': class_labels
})

# Display the first few rows of the dataset
print(data.head())

# Save the dataset to a CSV file
data.to_csv('fraud_detection_dataset.csv', index=False)


       Amount       Time  Class
0  375.165579   8.967380    0.0
1  950.763592   7.989890    0.0
2  732.261948   4.227694    0.0
3  599.059826  14.574400    0.0
4  156.862622  11.438980    1.0


In [7]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

# Set a random seed for reproducibility
np.random.seed(42)

# Create a synthetic fraud detection dataset
X, y = make_classification(
    n_samples=10000,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_clusters_per_class=2,
    weights=[0.95],
    flip_y=0,
    random_state=42
)

# Add columns names
feature_names = [f'Feature_{i}' for i in range(X.shape[1])]
data = pd.DataFrame(X, columns=feature_names)
data['Class'] = y

# Display the first few rows of the dataset
print(data.head())

# Save the dataset to a CSV file
data.to_csv('/content/fraud_detection_dataset.csv', index=False)


   Feature_0  Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  \
0   1.709192   1.179545  -0.850920   1.750773  -5.184980  -1.856230   
1   1.486885  -0.400909   1.852473   3.074283  -4.476158  -0.227699   
2  -1.968035   1.383723  -3.953401  -1.631949   3.940087   0.464082   
3  -2.311419   0.391472  -2.093389  -2.800227   2.672854   2.690298   
4  -0.813161   1.240635  -0.780868  -0.506012   0.960602   0.110177   

   Feature_6  Feature_7  Feature_8  Feature_9  Class  
0   1.724183  -3.129711   0.503606   2.318989      0  
1  -1.015016   2.697117  -0.065749   4.120415      0  
2  -0.880014   0.255723  -2.070350  -2.091797      0  
3  -0.295322  -0.064424   0.227488  -0.696111      0  
4   0.323789  -0.673712  -0.043679   0.074507      0  


In [6]:
# Import necessary libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import cross_val_score

# Load the dataset (replace 'your_dataset.csv' with your actual dataset)
df = pd.read_csv('/content/fraud_detection_dataset.csv')

# Data preprocessing
# (Assuming the dataset has columns like 'Amount', 'Time', and 'Class')
# Add any additional preprocessing steps based on your specific dataset

# Feature engineering
# (Assuming 'Amount' and 'Time' are relevant features; add more as needed)
features = ['Amount', 'Time']
X = df[features]
y = df['Class']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model development using XGBoost
model = GradientBoostingClassifier()
model.fit(X_train_scaled, y_train)

# Model evaluation on the test set
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

# Cross-validation to assess model performance
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='roc_auc')
print(f'Cross-Validation ROC AUC: {cv_scores.mean():.4f}')

# Data preprocessing
# (Assuming the dataset has columns like 'Amount', 'Time', and 'Class')
# Add any additional preprocessing steps based on your specific dataset

# Feature engineering
# (Assuming 'Amount' and 'Time' are relevant features; add more as needed)
features = ['Amount', 'Time']
X = df[features]
y = df['Class']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model development using XGBoost
model = GradientBoostingClassifier()
model.fit(X_train_scaled, y_train)

# Model evaluation on the test set
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

# Cross-validation to assess model performance
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='roc_auc')
print(f'Cross-Validation ROC AUC: {cv_scores.mean():.4f}')

# Data preprocessing
# (Assuming the dataset has columns like 'Amount', 'Time', and 'Class')
# Add any additional preprocessing steps based on your specific dataset

# Feature engineering
# (Assuming 'Amount' and 'Time' are relevant features; add more as needed)
features = ['Amount', 'Time']
X = df[features]
y = df['Class']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model development using XGBoost
model = GradientBoostingClassifier()
model.fit(X_train_scaled, y_train)

# Model evaluation on the test set
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

# Cross-validation to assess model performance
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='roc_auc')
print(f'Cross-Validation ROC AUC: {cv_scores.mean():.4f}')


Accuracy: 0.9435
Precision: 0.0000
Recall: 0.0000
ROC AUC: 0.4984
Cross-Validation ROC AUC: 0.5152
Accuracy: 0.9425
Precision: 0.0000
Recall: 0.0000
ROC AUC: 0.4979
Cross-Validation ROC AUC: 0.5153
Accuracy: 0.9425
Precision: 0.0000
Recall: 0.0000
ROC AUC: 0.4979
Cross-Validation ROC AUC: 0.5152
