In [None]:
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
# Set random seed for reproducibility
np.random.seed(42)


In [None]:

# Generate synthetic data
n_samples = 10000
n_features = 30  # Simulating 28 anonymized features + Amount and Time

# Simulating feature values
X = np.random.normal(0, 1, size=(n_samples, n_features))
amount = np.random.exponential(scale=100, size=n_samples).reshape(-1, 1)
time = np.random.uniform(0, 172800, size=n_samples).reshape(-1, 1)  # 2 days in seconds

# Fraud labels: 1 for fraud, 0 for legitimate transactions
fraud_ratio = 0.02  # 2% fraud transactions
y = np.random.choice([0, 1], size=n_samples, p=[1-fraud_ratio, fraud_ratio])

# Combine into a DataFrame
data = pd.DataFrame(np.hstack((time, X, amount)), columns=['Time'] + [f'V{i}' for i in range(1, n_features)] + ['Amount'])
data['Class'] = y

# Save the dataset
data.to_csv('synthetic_creditcard.csv', index=False)
print("Synthetic dataset created and saved as 'synthetic_creditcard.csv'")


In [None]:


# Load the dataset
data = pd.read_csv('synthetic_creditcard.csv')

# Exploratory Data Analysis
print(data.describe())
print(data.info())
sns.countplot(x='Class', data=data)
plt.show()

# Data Preprocessing
scaler = StandardScaler()
data['Amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))

# Model Building
X = data.drop('Class', axis=1)
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
