In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the data
try:
    df = pd.read_csv('data/test_creditcard_2023.csv')
except FileNotFoundError:
    print("Error: 'test_creditcard_2023.csv' not found.  Make sure the file is in the 'data/' directory.")
    exit()

In [None]:
# Display the first few rows of the DataFrame
print("First 5 rows of the data:")
print(df.head())

In [None]:
# Data Cleaning and Preprocessing
print("\nData Info:")
print(df.info())

In [None]:
# Handle missing values (if any)
df = df.dropna()


In [None]:
# Data Visualization - Histograms
plt.figure(figsize=(12, 6))
sns.histplot(df['v1'], kde=True)
plt.title('Distribution of V1')
plt.xlabel('V1')
plt.ylabel('Frequency')
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(df['v2'], kde=True)
plt.title('Distribution of V2')
plt.xlabel('V2')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Data Visualization - Scatter Plots
plt.figure(figsize=(8, 6))
sns.scatterplot(x='v1', y='v2', data=df)
plt.title('Scatter Plot of V1 vs V2')
plt.xlabel('V1')
plt.ylabel('V2')
plt.show()

In [None]:
# Data Visualization - Box Plots
plt.figure(figsize=(12, 6))
sns.boxplot(x='class', y='v1', data=df)
plt.title('Box Plot of V1 by Class')
plt.xlabel('Class')
plt.ylabel('V1')
plt.show()

In [None]:
# Correlation Analysis
correlation_matrix = df.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Further Analysis (Example:  Fraud Detection - if 'class' is 1)
# This is a placeholder - adapt to the actual data and problem.
fraud_transactions = df[df['class'] == 1]
print("\nNumber of Fraud Transactions:", len(fraud_transactions))

In [None]:
# Summary Statistics
print("\nSummary Statistics:")
print(df.describe())