In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation, DBSCAN, MeanShift
from sklearn.metrics import silhouette_score
import random
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.decomposition import PCA
random.seed(42)

In [None]:
df = pd.read_csv(r"C:\Users\gabri\Downloads\archive\Customer_Data.csv")

In [None]:
df.head()

In [None]:
df.describe(include='all')

In [None]:
df.info()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.info()

There are values missing from CREDIT_LIMIT and MINIMUM_PAYMENT, lets check the percent of values missing.

In [None]:
credit_limit_missing = 100 - (((df['CREDIT_LIMIT'].count()) / df['CREDIT_LIMIT'].size) * 100)
credit_limit_missing

In [None]:
minimum_payment_missing = 100 - (((df['MINIMUM_PAYMENTS'].count()) / df['MINIMUM_PAYMENTS'].size) * 100)
minimum_payment_missing

Since both percentages are under 10%, the rows with missing values will be removed.

In [None]:
df[df['CREDIT_LIMIT'].isnull()]

In [None]:
df.drop(df[df['CREDIT_LIMIT'].isnull()].index, axis=0, inplace=True)

In [None]:
df[df['MINIMUM_PAYMENTS'].isnull()]

In [None]:
df.drop(df[df['MINIMUM_PAYMENTS'].isnull()].index, axis=0, inplace=True)

In [None]:
df.info()

In [None]:
df.describe(include='all')

In [None]:
df.drop(columns=['CUST_ID'], inplace=True)

In [None]:
df.head()

# Univarite Analysis

In [None]:
df.columns

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8), (ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16), (ax17, _)) = plt.subplots(9, 2, figsize=(40, 120))

sns.histplot(df['BALANCE'], ax=ax1)
sns.histplot(df['BALANCE_FREQUENCY'], ax=ax2)
sns.histplot(df['PURCHASES'], ax=ax3)
sns.histplot(df['ONEOFF_PURCHASES'], ax=ax4)
sns.histplot(df['INSTALLMENTS_PURCHASES'], ax=ax5)
sns.histplot(df['CASH_ADVANCE'], ax=ax6)
sns.histplot(df['PURCHASES_FREQUENCY'], ax=ax7)
sns.histplot(df['ONEOFF_PURCHASES_FREQUENCY'], ax=ax8)
sns.histplot(df['PURCHASES_INSTALLMENTS_FREQUENCY'], ax=ax9)
sns.histplot(df['CASH_ADVANCE_FREQUENCY'], ax=ax10)
sns.histplot(df['CASH_ADVANCE_TRX'], ax=ax11)
sns.histplot(df['PURCHASES_TRX'], ax=ax12)
sns.histplot(df['CREDIT_LIMIT'], ax=ax13)
sns.histplot(df['PAYMENTS'], ax=ax14)
sns.histplot(df['MINIMUM_PAYMENTS'], ax=ax15)
sns.histplot(df['PRC_FULL_PAYMENT'], ax=ax16)
sns.histplot(df['TENURE'], ax=ax17)

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8), (ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16), (ax17, _)) = plt.subplots(9, 2, figsize=(40, 120))

sns.boxplot(df['BALANCE'], ax=ax1)
sns.boxplot(df['BALANCE_FREQUENCY'], ax=ax2)
sns.boxplot(df['PURCHASES'], ax=ax3)
sns.boxplot(df['ONEOFF_PURCHASES'], ax=ax4)
sns.boxplot(df['INSTALLMENTS_PURCHASES'], ax=ax5)
sns.boxplot(df['CASH_ADVANCE'], ax=ax6)
sns.boxplot(df['PURCHASES_FREQUENCY'], ax=ax7)
sns.boxplot(df['ONEOFF_PURCHASES_FREQUENCY'], ax=ax8)
sns.boxplot(df['PURCHASES_INSTALLMENTS_FREQUENCY'], ax=ax9)
sns.boxplot(df['CASH_ADVANCE_FREQUENCY'], ax=ax10)
sns.boxplot(df['CASH_ADVANCE_TRX'], ax=ax11)
sns.boxplot(df['PURCHASES_TRX'], ax=ax12)
sns.boxplot(df['CREDIT_LIMIT'], ax=ax13)
sns.boxplot(df['PAYMENTS'], ax=ax14)
sns.boxplot(df['MINIMUM_PAYMENTS'], ax=ax15)
sns.boxplot(df['PRC_FULL_PAYMENT'], ax=ax16)
sns.boxplot(df['TENURE'], ax=ax17)
plt.show()

There appears to be clear outliers that will be removed. The outliers will impact the results of the clusters with the exception of density based clustering. However, outliers mean they would need there own unique strategies in terms of handling different from the clusters. This combined with the shear number of data points is why they are being removed. Hopefully, with outliers removed the clustering can be cleaner. First we will check to see if there is existing data that doesn't make sense and remove it, then remove outliers.

In [None]:
df.columns

In [None]:
df[df['BALANCE'] < 0]
df[df['BALANCE_FREQUENCY'] < 0]
df[df['PURCHASES'] < 0]
df[df['ONEOFF_PURCHASES'] < 0]
df[df['INSTALLMENTS_PURCHASES'] < 0]
df[df['CASH_ADVANCE'] < 0]
df[df['PURCHASES_FREQUENCY'] < 0]
df[df['ONEOFF_PURCHASES_FREQUENCY'] < 0]
df[df['PURCHASES_INSTALLMENTS_FREQUENCY'] < 0]
df[df['CASH_ADVANCE_FREQUENCY'] < 0]
df[df['CASH_ADVANCE_TRX'] < 0]
df[df['PURCHASES_TRX'] < 0]
df[df['CREDIT_LIMIT'] < 0]
df[df['PAYMENTS'] < 0]
df[df['MINIMUM_PAYMENTS'] < 0]
df[df['PRC_FULL_PAYMENT'] < 0]
df[df['TENURE'] < 0]

In [None]:
df[df['BALANCE_FREQUENCY'] > 1]
df[df['ONEOFF_PURCHASES'] > 1]
df[df['PURCHASES_FREQUENCY'] > 1]
df[df['ONEOFF_PURCHASES_FREQUENCY'] > 1]
df[df['PURCHASES_INSTALLMENTS_FREQUENCY'] > 1]
df[df['CASH_ADVANCE_FREQUENCY'] > 1]

In [None]:
df.drop(df[df['CASH_ADVANCE_FREQUENCY'] > 1].index, axis=0, inplace=True)

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8), (ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16), (ax17, _)) = plt.subplots(9, 2, figsize=(40, 120))

sns.boxplot(df['BALANCE'], ax=ax1)
sns.boxplot(df['BALANCE_FREQUENCY'], ax=ax2)
sns.boxplot(df['PURCHASES'], ax=ax3)
sns.boxplot(df['ONEOFF_PURCHASES'], ax=ax4)
sns.boxplot(df['INSTALLMENTS_PURCHASES'], ax=ax5)
sns.boxplot(df['CASH_ADVANCE'], ax=ax6)
sns.boxplot(df['PURCHASES_FREQUENCY'], ax=ax7)
sns.boxplot(df['ONEOFF_PURCHASES_FREQUENCY'], ax=ax8)
sns.boxplot(df['PURCHASES_INSTALLMENTS_FREQUENCY'], ax=ax9)
sns.boxplot(df['CASH_ADVANCE_FREQUENCY'], ax=ax10)
sns.boxplot(df['CASH_ADVANCE_TRX'], ax=ax11)
sns.boxplot(df['PURCHASES_TRX'], ax=ax12)
sns.boxplot(df['CREDIT_LIMIT'], ax=ax13)
sns.boxplot(df['PAYMENTS'], ax=ax14)
sns.boxplot(df['MINIMUM_PAYMENTS'], ax=ax15)
sns.boxplot(df['PRC_FULL_PAYMENT'], ax=ax16)
sns.boxplot(df['TENURE'], ax=ax17)

In [None]:
df.columns

In [None]:
df[df['BALANCE'] > 17500]

In [None]:
df.drop(df[df['BALANCE'] > 17500].index, axis=0, inplace=True)

In [None]:
df[df['PURCHASES'] > 45000]

In [None]:
df.drop(df[df['PURCHASES'] > 45000].index, axis=0, inplace=True)

In [None]:
df[df['ONEOFF_PURCHASES'] > 30000]

In [None]:
df.drop(df[df['ONEOFF_PURCHASES'] > 30000].index, axis=0, inplace=True)

In [None]:
df[df['INSTALLMENTS_PURCHASES'] > 20000]

In [None]:
df.drop(df[df['INSTALLMENTS_PURCHASES'] > 20000].index, axis=0, inplace=True)

In [None]:
df[df['CASH_ADVANCE'] > 40000]

In [None]:
df.drop(df[df['CASH_ADVANCE'] > 40000].index, axis=0, inplace=True)

In [None]:
df[df['MINIMUM_PAYMENTS'] > 70000]

In [None]:
df.drop(df[df['MINIMUM_PAYMENTS'] > 70000].index, axis=0, inplace=True)

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8), (ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16), (ax17, _)) = plt.subplots(9, 2, figsize=(40, 120))

sns.boxplot(df['BALANCE'], ax=ax1)
sns.boxplot(df['BALANCE_FREQUENCY'], ax=ax2)
sns.boxplot(df['PURCHASES'], ax=ax3)
sns.boxplot(df['ONEOFF_PURCHASES'], ax=ax4)
sns.boxplot(df['INSTALLMENTS_PURCHASES'], ax=ax5)
sns.boxplot(df['CASH_ADVANCE'], ax=ax6)
sns.boxplot(df['PURCHASES_FREQUENCY'], ax=ax7)
sns.boxplot(df['ONEOFF_PURCHASES_FREQUENCY'], ax=ax8)
sns.boxplot(df['PURCHASES_INSTALLMENTS_FREQUENCY'], ax=ax9)
sns.boxplot(df['CASH_ADVANCE_FREQUENCY'], ax=ax10)
sns.boxplot(df['CASH_ADVANCE_TRX'], ax=ax11)
sns.boxplot(df['PURCHASES_TRX'], ax=ax12)
sns.boxplot(df['CREDIT_LIMIT'], ax=ax13)
sns.boxplot(df['PAYMENTS'], ax=ax14)
sns.boxplot(df['MINIMUM_PAYMENTS'], ax=ax15)
sns.boxplot(df['PRC_FULL_PAYMENT'], ax=ax16)
sns.boxplot(df['TENURE'], ax=ax17)

In [None]:
df[df['PURCHASES'] > 40000]

In [None]:
df.drop(df[df['PURCHASES'] > 40000].index, axis=0, inplace=True)

In [None]:
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8), (ax9, ax10), (ax11, ax12), (ax13, ax14), (ax15, ax16), (ax17, _)) = plt.subplots(9, 2, figsize=(40, 120))

sns.boxplot(df['BALANCE'], ax=ax1)
sns.boxplot(df['BALANCE_FREQUENCY'], ax=ax2)
sns.boxplot(df['PURCHASES'], ax=ax3)
sns.boxplot(df['ONEOFF_PURCHASES'], ax=ax4)
sns.boxplot(df['INSTALLMENTS_PURCHASES'], ax=ax5)
sns.boxplot(df['CASH_ADVANCE'], ax=ax6)
sns.boxplot(df['PURCHASES_FREQUENCY'], ax=ax7)
sns.boxplot(df['ONEOFF_PURCHASES_FREQUENCY'], ax=ax8)
sns.boxplot(df['PURCHASES_INSTALLMENTS_FREQUENCY'], ax=ax9)
sns.boxplot(df['CASH_ADVANCE_FREQUENCY'], ax=ax10)
sns.boxplot(df['CASH_ADVANCE_TRX'], ax=ax11)
sns.boxplot(df['PURCHASES_TRX'], ax=ax12)
sns.boxplot(df['CREDIT_LIMIT'], ax=ax13)
sns.boxplot(df['PAYMENTS'], ax=ax14)
sns.boxplot(df['MINIMUM_PAYMENTS'], ax=ax15)
sns.boxplot(df['PRC_FULL_PAYMENT'], ax=ax16)
sns.boxplot(df['TENURE'], ax=ax17)

In [None]:
df.info()

I believe the data is now clean, before performing any transformation and dimensional reduction techniques, lets perfrom EDA to understand the relationship between various data points.

# EDA

Note: Variables will be compared to one another using the regplot function. Once a comparsion has been made between two variables, there will not be another comment made when the reverse comparsion happens.

### Checking BALANCE

In [None]:
y_axis_values = df.columns.drop(['BALANCE'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='BALANCE', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('BALANCE', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()


When using BALANCE as the consistent variable, the resulting trends and implications were as follows:

BALANCE_FREQUENCY: Looking at the plot and the trendline, there is a reltively positive correlation between the BALANCE increasing and the BALANCE_FREQUENCY increasing. This means that the clusters will probably consist of low BALANCE with low BALANCE_FREQUENCY, medium BALANCE with medium BALANCE_FREQUENCY, and high BALANCE with high BALANCE_Frequency.

PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the PURCHASES increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters.

ONEOFF_PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the ONEOFF_PURCHASES increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters.

INSTALLMENTS_PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the INSTALLMENTS_PURCHASES increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters.

CASH_ADVANCE: Looking at the plot and trendline, there is a relatively positive correlation between the BALANCE increasing and the CASH_ADVANCE increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). This means that the clusters will probably consist of low BALANCE with low CASH_ADVANCE, medium BALANCE with medium CASH_ADVANCE, and high BALANCE with high CASH_ADVANCE.

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation between the BALANCE and PURCHASES_FREQUENCY. This means that the BALANCE increasing results in the PURCHASES_FREQUENCY decreasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). This means that the clusters will probably consist of low BALANCE with higher PURCHASES_FREQUENCY, medium BALANCE with medium PURCHASES_FREQUENCY, and high BALANCE with lower PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a moderately (less than relative) positive correlation between the BALANCE increasing and the ONEOFF_PURCHASES_FREQUENCY increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). I think this is enough to say common characteristics between different clusters for BALANCE and ONEOFF_PURCHASES_FREQUENCY, with low BALANCE and low ONEOFF_PURCHASES_FREQUENCY being together, medium BALANCE and medium ONEOFF_PURCHASES_FREQUENCY being together, and high BALANCE and high ONEOFF_PURCHASES_FREQUENCY being together.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation. When the BALANCE is increasing and the PURCHASES_INSTALLMENTS_FREQUENCY is decreasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). Common characteristics between different clusters would be low BALANCE and higher PURCHASES_INSTALLMENTS_FREQUENCY being together, medium BALANCE and medium PURCHASES_INSTALLMENTS_FREQUENCY being together, and high BALANCE and lower PURCHASES_INSTALLMENTS_FREQUENCY being together.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the BALANCE increasing the CASH_ADVANCE_FREQUENCY increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). This means clusters will consist of people with low BALANCE and low CASH_ADVANCE_FREQUENCY being together, medium BALANCE and medium CASH_ADVANCE_FREQUENCY being together, and high BALANCE and high CASH_ADVANCE_FREQUENCY being together.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a relatively positive correlation between the BALANCE increasing and the CASH_ADVANCE_TRX increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). This means that the clusters will probably consist of low BALANCE with low CASH_ADVANCE_TRX, medium BALANCE with medium CASH_ADVANCE_TRX, and high BALANCE with high CASH_ADVANCE_TRX.

PURCHASES_TRX: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the PURCHASES_TRX increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

CREDIT_LIMIT: Looking at the plot and trendline, there is a positive correlation between the BALANCE increasing and the CREDIT_LIMIT increasing. This means clusters will consist of people with low BALANCE and low CREDIT_LIMIT being together, medium BALANCE and medium CREDIT_LIMIT being together, and high BALANCE and high CREDIT_LIMIT being together.

PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the PAYMENTS increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE increasing and the MINIMUM_PAYMENTS increasing (it is hard to see based solely off the data points, so this answer is more based on the line of best fit). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a negative correlation. When the BALANCE is increasing the PRC_FULL_PAYMENT is decreasing. Common characteristics between different clusters would be low BALANCE and higher PRC_FULL_PAYMENT being together, medium BALANCE and medium PRC_FULL_PAYMENT being together, and high BALANCE and lower PRC_FULL_PAYMENT being together.

TENURE: Looking at the plot and trendline, there is a relatively positive correlation between the BALANCE increasing and the TENURE increasing. This means that the clusters will probably consist of low BALANCE with low TENURE, medium BALANCE with medium TENURE, and high BALANCE with high TENURE.

### Checking BALANCE_FREQUENCY

In [None]:
y_axis_values = df.columns.drop(['BALANCE_FREQUENCY'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='BALANCE_FREQUENCY', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('BALANCE_FREQUENCY', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using BALANCE_FREQUENCY as the consistent variable, the resulting trends and implications were as follows:

PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the PURCHASES increasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

ONEOFF_PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the ONEOFF_PURCHASES increasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

INSTALLMENTS_PURCHASES: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the INSTALLMENTS_PURCHASES increasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

CASH_ADVANCE: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the CASH_ADVANCE increasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). However, this is not enough of a correlation to confidently say common characteristics between different clusters.

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the BALANCE_FREQUENCY increasing and the PURCHASES_FREQUENCY increasing. This means that clusters will contain people with both low BALANCE_FREQUENCY and low PURCHASES_FREQUENCY, medium BALANCE_FREQUENCY and medium PURCHASES_FREQUENCY, and high BALANCE_FREQUENCY and medium PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a relatively positive correlation between the BALANCE_FREQUENCY increasing and the ONEOFF_PURCHASES_FREQUENCY increasing. This means that clusters will contain people with both low BALANCE_FREQUENCY and low ONEOFF_PURCHASES_FREQUENCY, medium BALANCE_FREQUENCY and medium ONEOFF_PURCHASES_FREQUENCY, and high BALANCE_FREQUENCY and medium ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a relatively positive correlation between the BALANCE_FREQUENCY increasing and the PURCHASES_INSTALLMENTS_FREQUENCY increasing. This means that clusters will contain people with both low BALANCE_FREQUENCY and low PURCHASES_INSTALLMENTS_FREQUENCY, medium BALANCE_FREQUENCY and medium PURCHASES_INSTALLMENTS_FREQUENCY, and high BALANCE_FREQUENCY and medium PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a moderate (less than relatively) positive correlation between the BALANCE_FREQUENCY increasing and the CASH_ADVANCE_FREQUENCY increasing. There is enough to suggest that clusters will contain people with both low BALANCE_FREQUENCY and low CASH_ADVANCE_FREQUENCY, medium BALANCE_FREQUENCY and medium CASH_ADVANCE_FREQUENCY, and high BALANCE_FREQUENCY and high CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the CASH_ADVANCE_TRX increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters (could say together would be low BALANCE_FREQUENCY and low CASH_ADVANCE_TRX, medium BALANCE_FREQUENCY and medium CASH_ADVANCE_TRX, and high BALANCE_FREQUENCY and high CASH_ADVANCE_TRX).

PURCHASES_TRX: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the PURCHASES_TRX increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters (could say together would be low BALANCE_FREQUENCY and low PURCHASES_TRX, medium BALANCE_FREQUENCY and medium PURCHASES_TRX, and high BALANCE_FREQUENCY and high PURCHASES_TRX).

CREDIT_LIMIT: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the CREDIT_LIMIT increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters (could say together would be low BALANCE_FREQUENCY and low CREDIT_LIMIT, medium BALANCE_FREQUENCY and medium CREDIT_LIMIT, and high BALANCE_FREQUENCY and high CREDIT_LIMIT).

PAYMENTS: Looking at the plot and trendline, there is a virtually no correlation between the BALANCE_FREQUENCY and the PAYMENTS. However, this is not enough of a correlation to confidently say common characteristics between different clusters.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation between the BALANCE_FREQUENCY increasing and the MINIMUM_PAYMENTS increasing. However, this is not enough of a correlation to confidently say common characteristics between different clusters (could say together would be low BALANCE_FREQUENCY and low MINIMUM_PAYMENTS, medium BALANCE_FREQUENCY and medium MINIMUM_PAYMENTS, and high BALANCE_FREQUENCY and high MINIMUM_PAYMENTS).

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a relatively negative correlation. The BALANCE_FREQUENCY increasing results in the PRC_FULL_PAYMENT decreasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). This means together in clusters would be low BALANCE_FREQUENCY and low PRC_FULL_PAYMENT, medium BALANCE_FREQUENCY and medium PRC_FULL_PAYMENT, and high BALANCE_FREQUENCY and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight positive correlation with the BALANCE_FREQUENCY increasing resulting in the TENURE increasing (looking strickly at the data points might tell a different story, but the line of best fit gives the better idea). However, this is not enough of a correlation to confidently say common characteristics between different clusters (could say together would be low BALANCE_FREQUENCY and low TENURE, medium BALANCE_FREQUENCY and medium TENURE, and high BALANCE_FREQUENCY and high TENURE).

### Checking PURCHASES

In [None]:
y_axis_values = df.columns.drop(['PURCHASES'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PURCHASES', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PURCHASES', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PURCHASES as the consistent variable, the resulting trends and implications were as follows:

ONEOFF_PURCHASES: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the ONEOFF_PURCHASES increasing. This indicates for clusters, low PURCHASES with low ONEOFF_PURCHASES, medium PURCHASES with medium ONEOFF_PURCHASES, and high PURCHASES with high ONEOFF_PURCHASES.

INSTALLMENTS_PURCHASES: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the INSTALLMENTS_PURCHASES increasing. This indicates for clusters, low PURCHASES with low INSTALLMENTS_PURCHASES, medium PURCHASES with medium INSTALLMENTS_PURCHASES, and high PURCHASES with high INSTALLMENTS_PURCHASES.

CASH_ADVANCE: Looking at the plot and trendline, there is a slight negative correlation. The PURCHASES increase and the CASH_ADVANCE decrease (The answer is focused on the trendline). However, there is not enough of a correlation to suggest characteristics for clusters.

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the PURCHASES_FREQUENCY increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low PURCHASES_FREQUENCY, medium PURCHASES with medium PURCHASES_FREQUENCY, and high PURCHASES with high PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the ONEOFF_PURCHASES_FREQUENCY increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low ONEOFF_PURCHASES_FREQUENCY, medium PURCHASES with medium ONEOFF_PURCHASES_FREQUENCY, and high PURCHASES with high ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the PURCHASES_INSTALLMENTS_FREQUENCY increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low PURCHASES_INSTALLMENTS_FREQUENCY, medium PURCHASES with medium PURCHASES_INSTALLMENTS_FREQUENCY, and high PURCHASES with high PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation. The PURCHASES increasing results in the CASH_ADVANCE_FREQUENCY decreasing. This indicates for clusters, low PURCHASES with higher CASH_ADVANCE_FREQUENCY, medium PURCHASES with medium CASH_ADVANCE_FREQUENCY, and high PURCHASES with lower CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a slight negative correlation or virtually no correlation. The PURCHASES increasing results in the CASH_ADVANCE_TRX decreasing. However, there is not enough of a correlation to suggest characteristics for clusters.

PURCHASES_TRX: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the PURCHASES_TRX increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low PURCHASES_TRX, medium PURCHASES with medium PURCHASES_TRX, and high PURCHASES with high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the CREDIT_LIMIT increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low CREDIT_LIMIT, medium PURCHASES with medium CREDIT_LIMIT, and high PURCHASES with high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a relatively positive correlation between the PURCHASES increasing and the PAYMENTS increasing. This indicates for clusters, low PURCHASES with low PAYMENTS, medium PURCHASES with medium PAYMENTS, and high PURCHASES with high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The PURCHASES increasing results in the MINIMUM_PAYMENTS increasing. However, there is not enough of a correlation to suggest characteristics for clusters.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a positive correlation between the PURCHASES increasing and the PRC_FULL_PAYMENT increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low PRC_FULL_PAYMENT, medium PURCHASES with medium PRC_FULL_PAYMENT, and high PURCHASES with high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a relatively positive correlation between the PURCHASES increasing and the TENURE increasing (hard to see based on the data, answer is based on the trendline). This indicates for clusters, low PURCHASES with low TENURE, medium PURCHASES with medium TENURE, and high PURCHASES with high TENURE.

### Checking ONEOFF_PURCHASES

In [None]:
y_axis_values = df.columns.drop(['ONEOFF_PURCHASES'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='ONEOFF_PURCHASES', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('ONEOFF_PURCHASES', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using ONEOFF_PURCHASES as the consistent variable, the resulting trends and implications were as follows:

INSTALLMENTS_PURCHASES: Looking at the plot and trendline, there is a relatively positive correlation between the ONEOFF_PURCHASES increasing and the INSTALLMENTS_PURCHASES increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low INSTALLMENTS_PURCHASES, medium ONEOFF_PURCHASES with medium INSTALLMENTS_PURCHASES, and high ONEOFF_PURCHASES with high INSTALLMENTS_PURCHASES.

CASH_ADVANCE: Looking at the plot and trendline, there is a slight negative correlation. The ONEOFF_PURCHASES increasing results in CASH_ADVANCE decreasing (again, this response is focused more on trendline). However, this is not a strong enough relationship to indicate characteristics in clusters.

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the PURCHASES_FREQUENCY increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low PURCHASES_FREQUENCY, medium ONEOFF_PURCHASES with medium PURCHASES_FREQUENCY, and high ONEOFF_PURCHASES with high PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the ONEOFF_PURCHASES_FREQUENCY increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low ONEOFF_PURCHASES_FREQUENCY, medium ONEOFF_PURCHASES with medium ONEOFF_PURCHASES_FREQUENCY, and high ONEOFF_PURCHASES with high ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the PURCHASES_INSTALLMENTS_FREQUENCY increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low PURCHASES_INSTALLMENTS_FREQUENCY, medium ONEOFF_PURCHASES with medium PURCHASES_INSTALLMENTS_FREQUENCY, and high ONEOFF_PURCHASES with high PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation. The ONEOFF_PURCHASES increasing results in the CASH_ADVANCE_FREQUENCY decreasing. This indicates for clusters, low ONEOFF_PURCHASES with higher CASH_ADVANCE_FREQUENCY, medium ONEOFF_PURCHASES with medium CASH_ADVANCE_FREQUENCY, and high ONEOFF_PURCHASES with lower CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a slight negative correlation. The ONEOFF_PURCHASES increasing results in the CASH_ADVANCE_TRX decreasing. However, this is not a strong enough relationship to indicate characteristics in clusters.

PURCHASES_TRX: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the PURCHASES_TRX increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low PURCHASES_TRX, medium ONEOFF_PURCHASES with medium PURCHASES_TRX, and high ONEOFF_PURCHASES with high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the CREDIT_LIMIT increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low CREDIT_LIMIT, medium ONEOFF_PURCHASES with medium CREDIT_LIMIT, and high ONEOFF_PURCHASES with high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the PAYMENTS increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low PAYMENTS, medium ONEOFF_PURCHASES with medium PAYMENTS, and high ONEOFF_PURCHASES with high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation between the ONEOFF_PURCHASES increasing and the MINIMUM_PAYMENTS increasing (again, this response is focused more on trendline). This could also be considered virtually no correlation. However, this is not a strong enough relationship to indicate characteristics in clusters.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a positive correlation between the ONEOFF_PURCHASES increasing and the PRC_FULL_PAYMENT increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low PRC_FULL_PAYMENT, medium ONEOFF_PURCHASES with medium PRC_FULL_PAYMENT, and high ONEOFF_PURCHASES with high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight positive correlation between the ONEOFF_PURCHASES increasing and the TENURE increasing (again, this response is focused more on trendline). This indicates for clusters, low ONEOFF_PURCHASES with low TENURE, medium ONEOFF_PURCHASES with medium TENURE, and high ONEOFF_PURCHASES with high TENURE.

### Checking INSTALLMENTS_PURCHASES

In [None]:
y_axis_values = df.columns.drop(['INSTALLMENTS_PURCHASES'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='INSTALLMENTS_PURCHASES', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('INSTALLMENTS_PURCHASES', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using INSTALLMENTS_PURCHASES as the consistent variable, the resulting trends and implications were as follows:

CASH_ADVANCE: Looking at the plot and trendline, there is a slight negative correlation. The INSTALLMENTS_PURCHASES increasing results in CASH_ADVANCE decreasing (again, this response is focused more on trendline). However, this is not a strong enough relationship to indicate characteristics in clusters.

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in PURCHASES_FREQUENCY increasing. This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low PURCHASES_FREQUENCY, medium INSTALLMENTS_PURCHASES and medium PURCHASES_FREQUENCY, and high INSTALLMENTS_PURCHASES and high PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in ONEOFF_PURCHASES_FREQUENCY increasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low ONEOFF_PURCHASES_FREQUENCY, medium INSTALLMENTS_PURCHASES and medium ONEOFF_PURCHASES_FREQUENCY, and high INSTALLMENTS_PURCHASES and high ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in PURCHASES_INSTALLMENTS_FREQUENCY increasing. This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low PURCHASES_INSTALLMENTS_FREQUENCY, medium INSTALLMENTS_PURCHASES and medium PURCHASES_INSTALLMENTS_FREQUENCY, and high INSTALLMENTS_PURCHASES and high PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation. The INSTALLMENTS_PURCHASES increasing results in CASH_ADVANCE_FREQUENCY decreasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and higher CASH_ADVANCE_FREQUENCY, medium INSTALLMENTS_PURCHASES and medium CASH_ADVANCE_FREQUENCY, and high INSTALLMENTS_PURCHASES and lower CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a slight negative correlation. The INSTALLMENTS_PURCHASES increasing results in CASH_ADVANCE_TRX decreasing (again, this response is focused more on trendline). It's debateable if there is enough correlation, but if there is it would be in the clusters that they consist of people with low INSTALLMENTS_PURCHASES and higher CASH_ADVANCE_TRX, medium INSTALLMENTS_PURCHASES and medium CASH_ADVANCE_TRX, and high INSTALLMENTS_PURCHASES and lower CASH_ADVANCE_TRX.

PURCHASES_TRX: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in PURCHASES_TRX increasing (based on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low PURCHASES_TRX, medium INSTALLMENTS_PURCHASES and medium PURCHASES_TRX, and high INSTALLMENTS_PURCHASES and high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in CREDIT_LIMIT increasing (based on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low CREDIT_LIMIT, medium INSTALLMENTS_PURCHASES and medium CREDIT_LIMIT, and high INSTALLMENTS_PURCHASES and high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a relatively positive correlation with the INSTALLMENTS_PURCHASES increasing results in PAYMENTS increasing (based on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low PAYMENTS, medium INSTALLMENTS_PURCHASES and medium PAYMENTS, and high INSTALLMENTS_PURCHASES and high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation with the INSTALLMENTS_PURCHASES increasing results in MINIMUM_PAYMENTS increasing (based on trendline). However, this not strong enough to indicate characteristics of clusters.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a positive correlation with the INSTALLMENTS_PURCHASES increasing results in PRC_FULL_PAYMENT increasing (based on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low PRC_FULL_PAYMENT, medium INSTALLMENTS_PURCHASES and medium PRC_FULL_PAYMENT, and high INSTALLMENTS_PURCHASES and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a relatively positive correlation with the INSTALLMENTS_PURCHASES increasing results in TENURE increasing (based on trendline). This means for the clusters that they will consist of people with low INSTALLMENTS_PURCHASES and low TENURE, medium INSTALLMENTS_PURCHASES and medium TENURE, and high INSTALLMENTS_PURCHASES and high TENURE.

### Checking CASH_ADVANCE

In [None]:
y_axis_values = df.columns.drop(['CASH_ADVANCE'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='CASH_ADVANCE', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('CASH_ADVANCE', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using CASH_ADVANCE as the consistent variable, the resulting trends and implications were as follows:

PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a negative correlation. The CASH_ADVANCE increasing results in PURCHASES_FREQUENCY decreasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and higher PURCHASES_FREQUENCY, medium CASH_ADVANCE and medium PURCHASES_FREQUENCY, and high CASH_ADVANCE and lower PURCHASES_FREQUENCY.

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a relatively negative correlation. The CASH_ADVANCE increasing results in ONEOFF_PURCHASES_FREQUENCY decreasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and higher ONEOFF_PURCHASES_FREQUENCY, medium CASH_ADVANCE and medium ONEOFF_PURCHASES_FREQUENCY, and high CASH_ADVANCE and lower ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a negative correlation. The CASH_ADVANCE increasing results in PURCHASES_INSTALLMENTS_FREQUENCY decreasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and higher PURCHASES_INSTALLMENTS_FREQUENCY, medium CASH_ADVANCE and medium PURCHASES_INSTALLMENTS_FREQUENCY, and high CASH_ADVANCE and lower PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a positive correlation. The CASH_ADVANCE increasing results in CASH_ADVANCE_FREQUENCY increasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and low CASH_ADVANCE_FREQUENCY, medium CASH_ADVANCE and medium CASH_ADVANCE_FREQUENCY, and high CASH_ADVANCE and high CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a positive correlation. The CASH_ADVANCE increasing results in CASH_ADVANCE_TRX increasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and low CASH_ADVANCE_TRX, medium CASH_ADVANCE and medium CASH_ADVANCE_TRX, and high CASH_ADVANCE and high CASH_ADVANCE_TRX.

PURCHASES_TRX: Looking at the plot and trendline, there is a slight negative correlation. The CASH_ADVANCE increasing results in PURCHASES_TRX decreasing (again, this response is focused more on trendline). However, this is not enough to determine characteristics of clusters.

CREDIT_LIMIT: Looking at the plot and trendline, there is a positive correlation. The CASH_ADVANCE increasing results in CREDIT_LIMIT increasing. This means for the clusters that they will consist of people with low CASH_ADVANCE and low CREDIT_LIMIT, medium CASH_ADVANCE and medium CREDIT_LIMIT, and high CASH_ADVANCE and high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a relatively positive correlation. The CASH_ADVANCE increasing results in PAYMENTS increasing. This means for the clusters that they will consist of people with low CASH_ADVANCE and low PAYMENTS, medium CASH_ADVANCE and medium PAYMENTS, and high CASH_ADVANCE and high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The CASH_ADVANCE increasing results in MINIMUM_PAYMENTS increasing. I would say this is enough to suggest low CASH_ADVANCE and low MINIMUM_PAYMENTS, medium CASH_ADVANCE and medium MINIMUM_PAYMENTS, and high CASH_ADVANCE and high MINIMUM_PAYMENTS.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a negative correlation. The CASH_ADVANCE increasing results in PRC_FULL_PAYMENT decreasing. This means for the clusters that they will consist of people with low CASH_ADVANCE and higher PRC_FULL_PAYMENT, medium CASH_ADVANCE and medium PRC_FULL_PAYMENT, and high CASH_ADVANCE and lower PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a relatively negative correlation. The CASH_ADVANCE increasing results in TENURE decreasing (again, this response is focused more on trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE and higher TENURE, medium CASH_ADVANCE and medium TENURE, and high CASH_ADVANCE and lower TENURE.

### Checking PURCHASES_FREQUENCY

In [None]:
y_axis_values = df.columns.drop(['PURCHASES_FREQUENCY'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PURCHASES_FREQUENCY', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PURCHASES_FREQUENCY', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PURCHASES_FREQUENCY as the consistent variable, the resulting trends and implications were as follows:

ONEOFF_PURCHASES_FREQUENCY: Looking at the plot and trendline, there is a relatively positive correlation. The PURCHASES_FREQUENCY increasing results in ONEOFF_PURCHASES_FREQUENCY increasing. This means for the clusters that they will consist of people with low PURCHASES_FREQUENCY and low ONEOFF_PURCHASES_FREQUENCY, medium PURCHASES_FREQUENCY and medium ONEOFF_PURCHASES_FREQUENCY, and high PURCHASES_FREQUENCY and high ONEOFF_PURCHASES_FREQUENCY.

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a positive correlation. The PURCHASES_FREQUENCY increasing results in PURCHASES_INSTALLMENTS_FREQUENCY increasing. This means for the clusters that they will consist of people with low PURCHASES_FREQUENCY and low PURCHASES_INSTALLMENTS_FREQUENCY, medium PURCHASES_FREQUENCY and medium PURCHASES_INSTALLMENTS_FREQUENCY, and high PURCHASES_FREQUENCY and high PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a slight negative correlation. The PURCHASES_FREQUENCY increasing results in CASH_ADVANCE_FREQUENCY decreasing (based on the trendline). There is enough correlation to suggest for clusters that people with low PURCHASES_FREQUENCY and higher CASH_ADVANCE_FREQUENCY, medium PURCHASES_FREQUENCY and medium CASH_ADVANCE_FREQUENCY, and high PURCHASES_FREQUENCY and lower CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a slight negative correlation to virtually no correlation. The PURCHASES_FREQUENCY increasing results in CASH_ADVANCE_FREQUENCY decreasing, if at all (based on the trendline). However, there is not enough to suggest characteristics between clusters.

PURCHASES_TRX: Looking at the plot and trendline, there is a slight positive correlation. The PURCHASES_FREQUENCY increasing results in PURCHASES_TRX increasing. There is enough to suggest clusters will consist of people with low PURCHASES_FREQUENCY and low PURCHASES_TRX, medium PURCHASES_FREQUENCY and medium PURCHASES_TRX, and high PURCHASES_FREQUENCY and high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a slight positive correlation to virtually none. The PURCHASES_FREQUENCY increasing results in CREDIT_LIMIT increasing, if at all (based on the trendline). There is not enough to suggest clusters will characteristics.

PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation to virtually none. The PURCHASES_FREQUENCY increasing results in PAYMENTS increasing, if at all (based on the trendline). There is not enough to suggest clusters will characteristics.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is zero correlation present.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a relatively positive correlation. The PURCHASES_FREQUENCY increasing results in PRC_FULL_PAYMENT increasing (based on the trendline). This means for the clusters that they will consist of people with low PURCHASES_FREQUENCY and low PRC_FULL_PAYMENT, medium PURCHASES_FREQUENCY and medium PRC_FULL_PAYMENT, and high PURCHASES_FREQUENCY and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight positive correlation. The PURCHASES_FREQUENCY increasing results in TENURE increasing (based on the trendline). There is not enough to suggest clusters will characteristics.

### Checking ONEOFF_PURCHASES_FREQUENCY

In [None]:
y_axis_values = df.columns.drop(['ONEOFF_PURCHASES_FREQUENCY'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='ONEOFF_PURCHASES_FREQUENCY', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('ONEOFF_PURCHASES_FREQUENCY', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using ONEOFF_PURCHASES_FREQUENCY as the consistent variable, the resulting trends and implications were as follows:

PURCHASES_INSTALLMENTS_FREQUENCY: Looking at the plot and trendline, there is a moderately (but not as much as relatively) positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in PURCHASES_INSTALLMENTS_FREQUENCY increasing (based on the trendline). This means for the clusters that they will consist of people with low ONEOFF_PURCHASES_FREQUENCY and low PURCHASES_INSTALLMENTS_FREQUENCY, medium ONEOFF_PURCHASES_FREQUENCY and medium PURCHASES_INSTALLMENTS_FREQUENCY, and high ONEOFF_PURCHASES_FREQUENCY and high PURCHASES_INSTALLMENTS_FREQUENCY.

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a slight negative correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in CASH_ADVANCE_FREQUENCY decreasing (based on the trendline). There is not enough of a correlation to suggest characteristics.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is virtually no correlation.

PURCHASES_TRX: Looking at the plot and trendline, there is a moderately (but not as much as relatively) positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in PURCHASES_TRX increasing (based on the trendline). This means for the clusters that they will consist of people with low ONEOFF_PURCHASES_FREQUENCY and low PURCHASES_TRX, medium ONEOFF_PURCHASES_FREQUENCY and medium PURCHASES_TRX, and high ONEOFF_PURCHASES_FREQUENCY and high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a moderately (but not as much as relatively) positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in CREDIT_LIMIT increasing (based on the trendline). This means for the clusters that they will consist of people with low ONEOFF_PURCHASES_FREQUENCY and low CREDIT_LIMIT, medium ONEOFF_PURCHASES_FREQUENCY and medium CREDIT_LIMIT, and high ONEOFF_PURCHASES_FREQUENCY and high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in PAYMENTS increasing (based on the trendline). However, there is not enough to suggest characteristics for clusters.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is zero correlation present.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a slight positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in PAYMENTS increasing (based on the trendline). This means for the clusters that they will consist of people with low ONEOFF_PURCHASES_FREQUENCY and low PRC_FULL_PAYMENT, medium ONEOFF_PURCHASES_FREQUENCY and medium PRC_FULL_PAYMENT, and high ONEOFF_PURCHASES_FREQUENCY and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight positive correlation. The ONEOFF_PURCHASES_FREQUENCY increasing results in TENURE increasing (based on the trendline). However, there is not enough to suggest characteristics for clusters (based on the data).

### Checking PURCHASES_INSTALLMENTS_FREQUENCY

In [None]:
y_axis_values = df.columns.drop(['PURCHASES_INSTALLMENTS_FREQUENCY'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PURCHASES_INSTALLMENTS_FREQUENCY', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PURCHASES_INSTALLMENTS_FREQUENCY', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PURCHASES_INSTALLMENTS_FREQUENCY as the consistent variable, the resulting trends and implications were as follows:

CASH_ADVANCE_FREQUENCY: Looking at the plot and trendline, there is a moderately (but not as much as relatively) negative correlation. The PURCHASES_INSTALLMENTS_FREQUENCY increasing results in CASH_ADVANCE_FREQUENCY decreasing (based on the trendline). This means for the clusters that they will consist of people with low PURCHASES_INSTALLMENTS_FREQUENCY and low CASH_ADVANCE_FREQUENCY, medium PURCHASES_INSTALLMENTS_FREQUENCY and medium CASH_ADVANCE_FREQUENCY, and high PURCHASES_INSTALLMENTS_FREQUENCY and high CASH_ADVANCE_FREQUENCY.

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is virtually no correlation.

PURCHASES_TRX: Looking at the plot and trendline, there is a moderately (but not as much as relatively) positive correlation. The PURCHASES_INSTALLMENTS_FREQUENCY increasing results in PURCHASES_TRX increasing (based on the trendline). This means for the clusters that they will consist of people with low PURCHASES_INSTALLMENTS_FREQUENCY and low PURCHASES_TRX, medium PURCHASES_INSTALLMENTS_FREQUENCY and medium PURCHASES_TRX, and high PURCHASES_INSTALLMENTS_FREQUENCY and high PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is virtually no correlation.

PAYMENTS: Looking at the plot and trendline, there is virtually no correlation.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is virtually no correlation.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a moderately (but not as much as relatively) positive correlation. The PURCHASES_INSTALLMENTS_FREQUENCY increasing results in PAYMENTS increasing (based on the trendline). This means for the clusters that they will consist of people with low PURCHASES_INSTALLMENTS_FREQUENCY and low PRC_FULL_PAYMENT, medium PURCHASES_INSTALLMENTS_FREQUENCY and medium PRC_FULL_PAYMENT, and high PURCHASES_INSTALLMENTS_FREQUENCY and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight positive correlation. The PURCHASES_INSTALLMENTS_FREQUENCY increasing results in TENURE increasing (based on the trendline). However, there is not enough to suggest characteristics for clusters (based on the data).

### Checking CASH_ADVANCE_FREQUENCY

In [None]:
y_axis_values = df.columns.drop(['CASH_ADVANCE_FREQUENCY'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='CASH_ADVANCE_FREQUENCY', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('CASH_ADVANCE_FREQUENCY', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using CASH_ADVANCE_FREQUENCY as the consistent variable, the resulting trends and implications were as follows:

CASH_ADVANCE_TRX: Looking at the plot and trendline, there is a relatively positive correlation. The CASH_ADVANCE_FREQUENCY increasing results in CASH_ADVANCE_TRX increasing. This means for the clusters that they will consist of people with low CASH_ADVANCE_FREQUENCY and low CASH_ADVANCE_TRX, medium CASH_ADVANCE_FREQUENCY and medium CASH_ADVANCE_TRX, and high CASH_ADVANCE_FREQUENCY and high CASH_ADVANCE_TRX.

PURCHASES_TRX: Looking at the plot and trendline, there is a slight negative correlation. The CASH_ADVANCE_FREQUENCY increasing results in PURCHASES_TRX decreasing (based on the trendline). However, this is not enough to suggest correlation in terms of characteristics for clusters.

CREDIT_LIMIT: Looking at the plot and trendline, there is a slight positive correlation. The CASH_ADVANCE_FREQUENCY increasing results in CREDIT_LIMIT increasing (based on the trendline). However, this is not enough to suggest correlation in terms of characteristics for clusters.

PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The CASH_ADVANCE_FREQUENCY increasing results in PAYMENTS increasing (based on the trendline). However, this is not enough to suggest correlation in terms of characteristics for clusters.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is virtually no correlation.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a moderately (but not quite relatively) negative correlation. The CASH_ADVANCE_FREQUENCY increasing results in PRC_FULL_PAYMENT decreasing (based on the trendline). This means for the clusters that they will consist of people with low CASH_ADVANCE_FREQUENCY and higher PRC_FULL_PAYMENT, medium CASH_ADVANCE_FREQUENCY and medium PRC_FULL_PAYMENT, and high CASH_ADVANCE_FREQUENCY and lower PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a slight negative correlation. The CASH_ADVANCE_FREQUENCY increasing results in TENURE decreasing (based on the trendline). However, this is not enough to suggest correlation in terms of characteristics for clusters.

### Checking CASH_ADVANCE_TRX

In [None]:
y_axis_values = df.columns.drop(['CASH_ADVANCE_TRX'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='CASH_ADVANCE_TRX', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('CASH_ADVANCE_TRX', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using CASH_ADVANCE_TRX as the consistent variable, the resulting trends and implications were as follows:

PURCHASES_TRX: Looking at the plot and trendline, there is a slight negative correlation. The CASH_ADVANCE_TRX increasing results in PURCHASES_TRX decreasing (based on the trendline). There might be characteristics in clusters for low CASH_ADVANCE_TRX and higher PURCHASES_TRX, medium CASH_ADVANCE_TRX and medium PURCHASES_TRX, and high CASH_ADVANCE_TRX and lower PURCHASES_TRX.

CREDIT_LIMIT: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The CASH_ADVANCE_TRX increasing results in CREDIT_LIMIT increasing (based on the trendline). Characteristics in clusters for low CASH_ADVANCE_TRX and low CREDIT_LIMIT, medium CASH_ADVANCE_TRX and medium CREDIT_LIMIT, and high CASH_ADVANCE_TRX and high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The CASH_ADVANCE_TRX increasing results in PAYMENTS increasing (based on the trendline). Characteristics in clusters for low CASH_ADVANCE_TRX and low PAYMENTS, medium CASH_ADVANCE_TRX and medium PAYMENTS, and high CASH_ADVANCE_TRX and high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The CASH_ADVANCE_TRX increasing results in MINIMUM_PAYMENTS increasing (based on the trendline). There might be characteristics in clusters for low CASH_ADVANCE_TRX and low MINIMUM_PAYMENTS, medium CASH_ADVANCE_TRX and medium MINIMUM_PAYMENTS, and high CASH_ADVANCE_TRX and high MINIMUM_PAYMENTS.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a relatively negative correlation. The CASH_ADVANCE_TRX increasing results in PRC_FULL_PAYMENT decreasing (based on the trendline). Characteristics in clusters for low CASH_ADVANCE_TRX and higher PRC_FULL_PAYMENT, medium CASH_ADVANCE_TRX and medium PRC_FULL_PAYMENT, and high CASH_ADVANCE_TRX and lower PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a moderately (but not quite relatively) negative correlation. The CASH_ADVANCE_TRX increasing results in TENURE decreasing (based on the trendline). Characteristics in clusters for low CASH_ADVANCE_TRX and higher TENURE, medium CASH_ADVANCE_TRX and medium TENURE, and high CASH_ADVANCE_TRX and lower TENURE.

### Checking PURCHASES_TRX

In [None]:
y_axis_values = df.columns.drop(['PURCHASES_TRX'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PURCHASES_TRX', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PURCHASES_TRX', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PURCHASES_TRX as the consistent variable, the resulting trends and implications were as follows:

CREDIT_LIMIT: Looking at the plot and trendline, there is a relatively positive correlation. The PURCHASES_TRX increasing results in CREDIT_LIMIT increasing (based on the trendline). Characteristics in clusters would be low PURCHASES_TRX and low CREDIT_LIMIT, medium PURCHASES_TRX and medium CREDIT_LIMIT, and high PURCHASES_TRX and high CREDIT_LIMIT.

PAYMENTS: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The PURCHASES_TRX increasing results in PAYMENTS increasing (based on the trendline). Characteristics in clusters would be low PURCHASES_TRX and low PAYMENTS, medium PURCHASES_TRX and medium PAYMENTS, and high PURCHASES_TRX and high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The PURCHASES_TRX increasing results in MINIMUM_PAYMENTS increasing (based on the trendline). The might be cluster characteristics of low PURCHASES_TRX and low MINIMUM_PAYMENTS, medium PURCHASES_TRX and medium MINIMUM_PAYMENTS, and high PURCHASES_TRX and high MINIMUM_PAYMENTS.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a positive correlation. The PURCHASES_TRX increasing results in PRC_FULL_PAYMENT increasing (based on the trendline). Cluster characteristics of low PURCHASES_TRX with low PRC_FULL_PAYMENT, medium PURCHASES_TRX with medium PRC_FULL_PAYMENT, and high PURCHASES_TRX with high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The PURCHASES_TRX increasing results in TENURE increasing (based on the trendline). Cluster characteristics of low PURCHASES_TRX with low TENURE, medium PURCHASES_TRX with medium TENURE, and high PURCHASES_TRX with high TENURE.

### Checking CREDIT_LIMIT

In [None]:
y_axis_values = df.columns.drop(['CREDIT_LIMIT'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='CREDIT_LIMIT', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('CREDIT_LIMIT', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using CREDIT_LIMIT as the consistent variable, the resulting trends and implications were as follows:

PAYMENTS: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The CREDIT_LIMIT increasing results in PAYMENTS increasing (based on the trendline). Characteristics in clusters would be low CREDIT_LIMIT and low PAYMENTS, medium CREDIT_LIMIT and medium PAYMENTS, and high CREDIT_LIMIT and high PAYMENTS.

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The CREDIT_LIMIT increasing results in MINIMUM_PAYMENTS increasing (based on the trendline). There might be enough to suggest characteristics in clusters would be low CREDIT_LIMIT and low MINIMUM_PAYMENTS, medium CREDIT_LIMIT and medium MINIMUM_PAYMENTS, and high CREDIT_LIMIT and high MINIMUM_PAYMENTS.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a slight positive correlation. The CREDIT_LIMIT increasing results in PRC_FULL_PAYMENT increasing (based on the trendline). There might be enough to suggest characteristics in clusters would be low CREDIT_LIMIT and low PRC_FULL_PAYMENT, medium CREDIT_LIMIT and medium PRC_FULL_PAYMENT, and high CREDIT_LIMIT and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The CREDIT_LIMIT increasing results in TENURE increasing (based on the trendline). Characteristics in clusters would be low CREDIT_LIMIT and low TENURE, medium CREDIT_LIMIT and medium TENURE, and high CREDIT_LIMIT and high TENURE.

### Checking Payments

In [None]:
y_axis_values = df.columns.drop(['PAYMENTS'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PAYMENTS', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PAYMENTS', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PAYMENTS as the consistent variable, the resulting trends and implications were as follows:

MINIMUM_PAYMENTS: Looking at the plot and trendline, there is a slight positive correlation. The PAYMENTS increasing results in MINIMUM_PAYMENTS increasing (based on the trendline). There might be enough to suggest characteristics in clusters would be low PAYMENTS and low MINIMUM_PAYMENTS, medium PAYMENTS and medium MINIMUM_PAYMENTS, and high PAYMENTS and high MINIMUM_PAYMENTS.

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The PAYMENTS increasing results in PRC_FULL_PAYMENT increasing (based on the trendline). Characteristics in clusters would be low PAYMENTS and low PRC_FULL_PAYMENT, medium PAYMENTS and medium PRC_FULL_PAYMENT, and high PAYMENTS and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The PAYMENTS increasing results in TENURE increasing (based on the trendline). Characteristics in clusters would be low PAYMENTS and low TENURE, medium PAYMENTS and medium TENURE, and high PAYMENTS and high TENURE.

### Checking MINIMUM_PAYMENTS

In [None]:
y_axis_values = df.columns.drop(['MINIMUM_PAYMENTS'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='MINIMUM_PAYMENTS', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('MINIMUM_PAYMENTS', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using MINIMUM_PAYMENTS as the consistent variable, the resulting trends and implications were as follows:

PRC_FULL_PAYMENT: Looking at the plot and trendline, there is a relatively negative correlation. The MINIMUM_PAYMENTS increasing results in PRC_FULL_PAYMENT increasing (based on the trendline). Characteristics in clusters would be low MINIMUM_PAYMENTS and low PRC_FULL_PAYMENT, medium MINIMUM_PAYMENTS and medium PRC_FULL_PAYMENT, and high MINIMUM_PAYMENTS and high PRC_FULL_PAYMENT.

TENURE: Looking at the plot and trendline, there is a moderately (but not quite relatively) positive correlation. The MINIMUM_PAYMENTS increasing results in TENURE increasing (based on the trendline). Characteristics in clusters would be low MINIMUM_PAYMENTS and low TENURE, medium MINIMUM_PAYMENTS and medium TENURE, and high MINIMUM_PAYMENTS and high TENURE.

### Checking PRC_FULL_PAYMENT

In [None]:
y_axis_values = df.columns.drop(['PRC_FULL_PAYMENT'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='PRC_FULL_PAYMENT', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('PRC_FULL_PAYMENT', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

When using PRC_FULL_PAYMENT as the consistent variable, the resulting trends and implications were as follows:

TENURE: Looking at the plot and trendline, there is virtually no correlation.

### Checking TENURE

In [None]:
y_axis_values = df.columns.drop(['TENURE'])

fig, axes = plt.subplots(8, 2, figsize=(40, 120))

for i, column in enumerate(y_axis_values):
    axis_title = axes[i // 2, i % 2]
    sns.regplot(x='TENURE', y=column, data=df, line_kws={'color': 'red'}, ax=axis_title)
    axis_title.grid(True)
    axis_title.set_xlabel('TENURE', fontsize=15)
    axis_title.set_ylabel(column, fontsize=15)

plt.show()

# Clustering

In [None]:
df

In [None]:
df2 = df.copy()

In [None]:
numerical_features = ['BALANCE', 'BALANCE_FREQUENCY', 'PURCHASES', 'ONEOFF_PURCHASES', 'INSTALLMENTS_PURCHASES', 'CASH_ADVANCE', 'PURCHASES_FREQUENCY', 'ONEOFF_PURCHASES_FREQUENCY', 'PURCHASES_INSTALLMENTS_FREQUENCY', 'CASH_ADVANCE_FREQUENCY', 'CASH_ADVANCE_TRX', 'PURCHASES_TRX', 'CREDIT_LIMIT', 'PAYMENTS', 'MINIMUM_PAYMENTS', 'PRC_FULL_PAYMENT', 'TENURE']

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features)])

In [None]:
df2[numerical_features] = preprocessor.fit_transform(df2[numerical_features])

In [None]:
df2[numerical_features]

It has been decided to incorporate all clustering techniques discussed in class, except expectation maximization distribution since those are all the models that use sklearn.

In [None]:
inertia = []
silhouette_avg = []

for i in range(1, 15):
    kmeans = KMeans(n_clusters=i, random_state=0)
    kmeans.fit(df2)
    inertia.append(kmeans.inertia_)
    labels = kmeans.labels_
    centers = kmeans.cluster_centers_

    if i>1:
        silhouette_avg = silhouette_avg + [silhouette_score(df2, labels)]

# Silhouette Score
plt.plot(range(1, 15), inertia)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
 
plt.plot(range(2, 15), silhouette_avg)
plt.title('Silhouette Score')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
K = 3
kmeans = KMeans(n_clusters=K, random_state=0).fit(df2)

In [None]:
# create plots
for c in range(K):
    df2[kmeans.labels_==c].plot(legend=False)
    plt.plot(kmeans.cluster_centers_[c], linewidth=8, color='orange')
    plt.title("cluster "+str(c))

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(df2)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

# Silhouette Score
sil_score = silhouette_score(df2, labels)
print(f"Silhouette Score: {sil_score}")

# Plotting the clusters
#plt.scatter(X[:, 0], X[:, 1], c=labels)
#plt.scatter(centers[:, 0], centers[:, 1], c='red')
#plt.title('K-means Clustering')
#plt.show()

In [None]:
# Generate linkage matrix
Z = linkage(df2, method='ward')

# Plot dendrogram
plt.figure(figsize=(10, 5))
dendrogram(Z)
plt.title('Hierarchical Clustering Dendrogram')
plt.show()

In [None]:
# Applying Affinity Propagation
af = AffinityPropagation(random_state=0)
af_labels = af.fit_predict(df2)

# Silhouette Score
af_sil_score = silhouette_score(df2, af_labels)
print(f"Affinity Propagation Silhouette Score: {af_sil_score}")

# Plotting
#plt.scatter(X[:, 0], X[:, 1], c=af_labels)
#plt.title('Affinity Propagation Clustering')
#plt.show()

In [None]:
# Applying DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(df2)

# Silhouette Score
# Note: DBSCAN can result in noise data point with label -1
# Only include points that are part of a cluster
if len(set(dbscan_labels)) > 1:
    dbscan_sil_score = silhouette_score(df2, dbscan_labels)
    print(f"DBSCAN Silhouette Score: {dbscan_sil_score}")

# Plotting
#plt.scatter(X[:, 0], X[:, 1], c=dbscan_labels)
#plt.title('DBSCAN Clustering')
#plt.show()

In [None]:
mean_shift_clustering = MeanShift(bandwidth=3)
clustering_labels = mean_shift_clustering.fit_predict(df2)

# Silhouette Score
ms_sil_score = silhouette_score(df2, clustering_labels)
print(f"Mean-Shift Silhouette Score: {ms_sil_score}")

In [None]:
agglomerative_clustering = AgglomerativeClustering()
a_clustering_labels = agglomerative_clustering.fit_predict(df2)

# Silhouette Score
a_c_sil_score = silhouette_score(df2, a_clustering_labels)
print(f"Agglomerative Clustering Silhouette Score: {a_c_sil_score}")

The chosen clustering technique will be K-means explained below why.

Lets do some visualizations. Since there are so many combinations of variations, visualizations will be shown with respect to the three most important variables (when looking at the goal of the dataset): BALANCE, PURCHASES, and PAYMENTS. A rough summary of how clusters are being made will be shown at the end. Strategies for each cluster will be based on these visualizations.

In [None]:
df2.columns

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(df2)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

# Plotting the clusters
plt.scatter(df2['BALANCE'], df2['BALANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['ONEOFF_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['INSTALLMENTS_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['CASH_ADVANCE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['ONEOFF_PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PURCHASES_INSTALLMENTS_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['CASH_ADVANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['CASH_ADVANCE_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PURCHASES_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['CREDIT_LIMIT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PAYMENTS'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['MINIMUM_PAYMENTS'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['PRC_FULL_PAYMENT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['BALANCE'], df2['TENURE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(df2)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

# Plotting the clusters
plt.scatter(df2['PURCHASES'], df2['BALANCE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()


plt.scatter(df2['PURCHASES'], df2['BALANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['ONEOFF_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['INSTALLMENTS_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['CASH_ADVANCE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['ONEOFF_PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['PURCHASES_INSTALLMENTS_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['CASH_ADVANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['CASH_ADVANCE_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['PURCHASES_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['CREDIT_LIMIT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['PAYMENTS'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['MINIMUM_PAYMENTS'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['PRC_FULL_PAYMENT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PURCHASES'], df2['TENURE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(df2)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

# Plotting the clusters
plt.scatter(df2['PAYMENTS'], df2['BALANCE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()


plt.scatter(df2['PAYMENTS'], df2['BALANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['ONEOFF_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['INSTALLMENTS_PURCHASES'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['CASH_ADVANCE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['ONEOFF_PURCHASES_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['PURCHASES_INSTALLMENTS_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['CASH_ADVANCE_FREQUENCY'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['CASH_ADVANCE_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['PURCHASES_TRX'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['CREDIT_LIMIT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['MINIMUM_PAYMENTS'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['PRC_FULL_PAYMENT'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

plt.scatter(df2['PAYMENTS'], df2['TENURE'], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], c='red')
plt.title('K-means Clustering')
plt.show()

Perform PCA

In [None]:
# Perform PCA
pca = PCA()
df_pca = pca.fit_transform(df2)

# The transformed data is an array, convert it back into a dataframe
df_pca = pd.DataFrame(df_pca, columns=[f'PC{i+1}' for i in range(len(df.columns))])

# Print the explained variance ratio
print('Explained variance ratio:', pca.explained_variance_ratio_)

# Print the cumulative explained variance ratio
cumsum_variance = np.cumsum(pca.explained_variance_ratio_)
print('Cumulative explained variance ratio:', cumsum_variance)

# Show the first few rows of transformed dataframe
df_pca.head()

In [None]:
#Try to obtain 90%
df3 = df2.copy()
pca = PCA(n_components=10)
df3_shortened = pca.fit_transform(df3)

In [None]:
df3_shortened

In [None]:
df3_shortened = pd.DataFrame(df3_shortened)

In [None]:
inertia = []
silhouette_avg = []

for i in range(1, 15):
    kmeans = KMeans(n_clusters=i, random_state=0)
    kmeans.fit(df3_shortened)
    inertia.append(kmeans.inertia_)
    labels = kmeans.labels_
    centers = kmeans.cluster_centers_

    if i>1:
        silhouette_avg = silhouette_avg + [silhouette_score(df3_shortened, labels)]

# Silhouette Score
plt.plot(range(1, 15), inertia)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
 
plt.plot(range(2, 15), silhouette_avg)
plt.title('Silhouette Score')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
K = 3
kmeans = KMeans(n_clusters=K, random_state=0).fit(df3_shortened)

In [None]:
# create plots
for c in range(K):
    df3_shortened[kmeans.labels_==c].plot(legend=False)
    plt.plot(kmeans.cluster_centers_[c], linewidth=4, color='orange')
    plt.title("cluster "+str(c))

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(df3_shortened)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

# Silhouette Score
sil_score = silhouette_score(df3_shortened, labels)
print(f"Silhouette Score: {sil_score}")

## Part 1

    Remove outliers or data that did not make sense
    Remove customer ID since it was not needed
    Removed missing values
    Scaled the data using standard scaler
    After everything did PCA to reduce dimensionality while still having some explainability.

## Part 2

Part 2 answer is given under the visualizations for the EDA.

## Part 3

Note: Even though PCA was conducted, analysis will be based on non-PCA clusters for explainability purposes.

Out of the clustering techniques implemented (the K-means, Affinity Propagation, DBSCAN, Mean-shift, and Agglomerative), the technique identified as the best would be the K-means one. There were a couple reasons for this decision. First, a consistent evaluation method across all the clustering techniques used was the Silhouette Score due to its ease of use, easy to understand, and easy to use across multiple clusters. The K-means Silhouette Score was the best out of the 5 techniques used (2 out of 5 generated negative values, showing clustering was not hapenning cleanly). Second, out of all the techniques ran, it appeared to be the most coputationally inexpensive. There are a lot of data points to consider, which can be taxing on resources, so having a method that won't be taxxing is important. Finally, there was not clear separation of data in the visualizations I saw. To me, I think the computer will have a tough time choosing the correct number of clusters to best separate the data. Giving instructions on how many clusters to do (3 as that generated the best Silhouette Score) is important. Also some data cleaning was done with the intention of a more normal distribution, something K-means excels at separating.

## Part 4

Note: Even though PCA was conducted, analysis will be based on non-PCA clusters for explainability purposes. Also the clusters will be referred to in their colours as yellow, green, and purple.

Looking at the visualizations with respect to BALANCE, PURCHASES, and PAYMENTS, characteristics can be determined. Starting with BALANCE yellow was for when the BALANCE was low, which typically correlated to the other variable being low (there were some graphs that had yellow when the variable opposed to balance was both low and high). When there was a clear separation with purple and green, it was typically purple was the data that consisted of high balance and low other variable, while green was high BALANCE and high other variable (there was one exception). For these two clusters, I think a telling different characteristic between the two were when the other was something to do with purchases (not frequency score), the purple was high balance and low number of purchases suggesting low spenders while the green was high balance and high number of purchases, suggesting big spenders. However, the payments, with respect to BALANCE, were relatively similar suggesting green is the riskiest cluster since they are buying stuff, but not being great about paying it back.

For PURCHASES, yellow was always low number of PURCHASES, but typically varied between high and low values for the other variable. The green was very similar (slightly more purchases), but purple was a wide array of purchase numbers. In these three clusters, there was not any telling information about characteristics. It made sense that more purchases equated a higher balance and higher payments, while low purchases indicate low balance and payments.

For PAYMENTS, yellow was typically both high, purple was typically high payments and low other variable and green was typically low and low. This was telling for purchases as despite having high payments, purple had low BALANCE and low purchases which affect potential strategies.

## Part 5

Note: Even though PCA was conducted, analysis will be based on non-PCA clusters for explainability purposes. Also the clusters will be referred to in their colours as yellow, green, and purple.

Business recommendations will alter depending on the cluster you are referring to in the 'repect to option'. When doing with respect to BALANCE, yellow (the yellow in these circumstances) should be incentived to buy slightly more things with a slightly higher credit limit. This would allow to charge interest and make more money. Any corporations the banks have an ongoing relationship (like McDonalds), provide coupons to purple customers since they need to spend more. It's great they have high balance so we can use the money to invest, but we need them spending something so we can charge interest. We need to lower the credit limit of green since they are buying but not making payments.

For purple in PAYMENTS, we need to maintain these clients on a regular since they make payments with buying, showing low risk.