<a href="https://colab.research.google.com/github/Kalyanchittaluri/MachineLearningProjects/blob/main/CreditCardClustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
#import all required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import plotly.graph_objects as go


In [5]:
# Load the data
data = pd.read_csv('CC GENERAL.csv')
print(data)


     CUST_ID      BALANCE  BALANCE_FREQUENCY  PURCHASES  ONEOFF_PURCHASES  \
0     C10001    40.900749           0.818182      95.40              0.00   
1     C10002  3202.467416           0.909091       0.00              0.00   
2     C10003  2495.148862           1.000000     773.17            773.17   
3     C10004  1666.670542           0.636364    1499.00           1499.00   
4     C10005   817.714335           1.000000      16.00             16.00   
...      ...          ...                ...        ...               ...   
8945  C19186    28.493517           1.000000     291.12              0.00   
8946  C19187    19.183215           1.000000     300.00              0.00   
8947  C19188    23.398673           0.833333     144.40              0.00   
8948  C19189    13.457564           0.833333       0.00              0.00   
8949  C19190   372.708075           0.666667    1093.25           1093.25   

      INSTALLMENTS_PURCHASES  CASH_ADVANCE  PURCHASES_FREQUENCY  \
0       

In [6]:
# Drop missing values
data = data.dropna()
print(data)

     CUST_ID      BALANCE  BALANCE_FREQUENCY  PURCHASES  ONEOFF_PURCHASES  \
0     C10001    40.900749           0.818182      95.40              0.00   
1     C10002  3202.467416           0.909091       0.00              0.00   
2     C10003  2495.148862           1.000000     773.17            773.17   
4     C10005   817.714335           1.000000      16.00             16.00   
5     C10006  1809.828751           1.000000    1333.28              0.00   
...      ...          ...                ...        ...               ...   
8943  C19184     5.871712           0.500000      20.90             20.90   
8945  C19186    28.493517           1.000000     291.12              0.00   
8947  C19188    23.398673           0.833333     144.40              0.00   
8948  C19189    13.457564           0.833333       0.00              0.00   
8949  C19190   372.708075           0.666667    1093.25           1093.25   

      INSTALLMENTS_PURCHASES  CASH_ADVANCE  PURCHASES_FREQUENCY  \
0       

In [7]:
# Select relevant columns
t_data = data[["BALANCE", "PURCHASES", "CREDIT_LIMIT"]]


In [8]:
# Scale the data
scaler = MinMaxScaler()
t_data_scaled = scaler.fit_transform(t_data)

In [9]:
# Perform KMeans clustering
kmeans = KMeans(n_clusters=5)
clusters = kmeans.fit_predict(t_data_scaled)


In [10]:
# Add cluster labels to the original data
copy_data = data.copy()
copy_data["CREDIT_CARD_CLUSTERS"] = clusters


In [11]:
# Map cluster labels to cluster names
copy_data["CREDIT_CARD_CLUSTERS"] = copy_data["CREDIT_CARD_CLUSTERS"].map({
    0: "Cluster 1",
    1: "Cluster 2",
    2: "Cluster 3",
    3: "Cluster 4",
    4: "Cluster 5"
})

print(copy_data)

     CUST_ID      BALANCE  BALANCE_FREQUENCY  PURCHASES  ONEOFF_PURCHASES  \
0     C10001    40.900749           0.818182      95.40              0.00   
1     C10002  3202.467416           0.909091       0.00              0.00   
2     C10003  2495.148862           1.000000     773.17            773.17   
4     C10005   817.714335           1.000000      16.00             16.00   
5     C10006  1809.828751           1.000000    1333.28              0.00   
...      ...          ...                ...        ...               ...   
8943  C19184     5.871712           0.500000      20.90             20.90   
8945  C19186    28.493517           1.000000     291.12              0.00   
8947  C19188    23.398673           0.833333     144.40              0.00   
8948  C19189    13.457564           0.833333       0.00              0.00   
8949  C19190   372.708075           0.666667    1093.25           1093.25   

      INSTALLMENTS_PURCHASES  CASH_ADVANCE  PURCHASES_FREQUENCY  \
0       

In [12]:
# Define a list of colors for clusters
colors = ["red", "blue", "green", "purple", "orange"]

In [13]:
# Create a 3D scatter plot
PLOT = go.Figure()

In [14]:
# Loop through each unique cluster and assign a specific color
for idx, i in enumerate(copy_data["CREDIT_CARD_CLUSTERS"].unique()):
    PLOT.add_trace(go.Scatter3d(
        x=copy_data[copy_data["CREDIT_CARD_CLUSTERS"] == i]['BALANCE'],
        y=copy_data[copy_data["CREDIT_CARD_CLUSTERS"] == i]['PURCHASES'],
        z=copy_data[copy_data["CREDIT_CARD_CLUSTERS"] == i]['CREDIT_LIMIT'],
        mode='markers',
        marker=dict(size=6, color=colors[idx % len(colors)], line=dict(width=1)),  # Assign colors
        name=str(i)
    ))

PLOT.update_traces(hovertemplate="BALANCE: %{x} <br>PURCHASES: %{y} <br>CREDIT LIMIT: %{z}")
PLOT.update_layout(
    width=800, height=800, autosize=True, showlegend=True,
    scene=dict(
        xaxis=dict(title='BALANCE', titlefont_color='black'),
        yaxis=dict(title='PURCHASES', titlefont_color='black'),
        zaxis=dict(title='CREDIT_LIMIT', titlefont_color='black')
    ),
    font=dict(family="Gilroy", color='black', size=12)
)

PLOT.show()