# Customer Transaction Behavior Analysis

Analyzing transaction patterns to identify customer behavior and detect potential fraud.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')


## Load Dataset

In [None]:

# Replace with your own file path
df = pd.read_csv("transactions.csv")
df.head()


## Data Cleaning & Preprocessing

In [None]:

# Handle missing values
df.dropna(inplace=True)

# Convert timestamps
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Feature engineering
df['Hour'] = df['Timestamp'].dt.hour
df['DayOfWeek'] = df['Timestamp'].dt.dayofweek

# Normalize Transaction Amount
scaler = StandardScaler()
df['Amount_Scaled'] = scaler.fit_transform(df[['Transaction_Amount']])
df.head()


## K-Means Clustering

In [None]:

X = df[['Amount_Scaled', 'Hour', 'DayOfWeek']]
inertia = []
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

plt.plot(range(1, 10), inertia, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.show()


In [None]:

kmeans = KMeans(n_clusters=4, random_state=42)
df['Cluster'] = kmeans.fit_predict(X)
df['Cluster'].value_counts()


## Visualize Clusters with PCA

In [None]:

pca = PCA(n_components=2)
principal_components = pca.fit_transform(X)
df['PC1'] = principal_components[:, 0]
df['PC2'] = principal_components[:, 1]

plt.figure(figsize=(8, 6))
sns.scatterplot(x='PC1', y='PC2', hue='Cluster', data=df, palette='Set2')
plt.title('Customer Clusters Visualization')
plt.show()


In [None]:

df.to_csv("clustered_transactions.csv", index=False)
