# Plots

Code used to generate the histograms, scatter plots and correlations for the data.
This is divided in two parts: for recurring customers and new customers.

In [None]:
# Import libraries
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt

## Recurring customers

In [None]:
# Upload data
with open('Data final\\data_recurring_imputed.pkl', 'rb') as file:
    data = pickle.load(file)

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Number of accounts", "Age", "Longevity", "Loan extensions", "Default probability", "Number of transactions"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Number of accounts", "Age", "Longevity", "Loan extensions", "Default probability", "Number of transactions"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("Recurring_1.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Consumer Confidence Index", "Exchange rate", "Inflation", "Interest rate", "GDP growth", "Unemployment rate"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Consumer Confidence Index", "Exchange rate", "Inflation", "Interest rate", "GDP growth", "Unemployment rate"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("Recurring_2.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Invoice accounts", "Consumer loans", "Buy-now-pay-later", "Credit cards A", "Credit cards B", "Credit cards C"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Invoice accounts", "Consumer loans", "Buy-now-pay-later", "Credit cards A", "Credit cards B", "Credit cards C"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("Recurring_3.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Minimum limit", "Maximum limit", "Minimum balance", "Maximum balance"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Minimum limit", "Maximum limit", "Minimum balance", "Maximum balance"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("Recurring_4.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Number of accounts", "Age", "Longevity", "Loan extensions", "Maximum limit", "Maximum balance", "Credit cards A", "Consumer loans"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Number of accounts", "Age", "Longevity", "Loan extensions", "Maximum limit", "Maximum balance", "Credit cards A", "Consumer loans"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("Recurring main.jpeg", bbox_inches='tight')

plt.show()

In [None]:
# Create four subplots
fig, axs = plt.subplots(2, 2, figsize=(15, 10))

# Plot the first histogram
axs[0, 0].hist(data['Total revenue'].loc[data["Gender"] == 0], alpha=0.5, label='Females')
axs[0, 0].hist(data['Total revenue'].loc[data["Gender"] == 1], alpha=0.5, label='Males')
axs[0, 0].set_title('Gender')
axs[0, 0].set_xlabel('Total Revenue')
axs[0, 0].set_ylabel('Number of observations')
axs[0, 0].legend(loc='upper right')
axs[0, 0].tick_params(axis='both', which='both', length=0)

# Plot the second histogram
axs[0, 1].hist(data['Total revenue'].loc[data["Insurance"] == 0], alpha=0.5, label='Not insured')
axs[0, 1].hist(data['Total revenue'].loc[data["Insurance"] == 1], alpha=0.5, label='Insured')
axs[0, 1].set_title('Insurance')
axs[0, 1].set_xlabel('Total Revenue')
axs[0, 1].set_ylabel('Number of observations')
axs[0, 1].legend(loc='upper right')
axs[0, 1].tick_params(axis='both', which='both', length=0)

# Plot the third histogram
axs[1, 0].hist(data['Total revenue'].loc[data["Default"] == 0], alpha=0.5, label='Not in default')
axs[1, 0].hist(data['Total revenue'].loc[data["Default"] == 1], alpha=0.5, label='In default')
axs[1, 0].set_title('Default')
axs[1, 0].set_xlabel('Total Revenue')
axs[1, 0].set_ylabel('Number of observations')
axs[1, 0].legend(loc='upper right')
axs[1, 0].tick_params(axis='both', which='both', length=0)

# Plot the fourth histogram
axs[1, 1].hist(data['Total revenue'].loc[data["Co-applicant"] == 0], alpha=0.5, label='No co-applicant')
axs[1, 1].hist(data['Total revenue'].loc[data["Co-applicant"] == 1], alpha=0.5, label='With a co-applicant')
axs[1, 1].set_title('Co-applicant')
axs[1, 1].set_xlabel('Total Revenue')
axs[1, 1].set_ylabel('Number of observations')
axs[1, 1].legend(loc='upper right')
axs[1, 1].tick_params(axis='both', which='both', length=0)

# Adjust the spacing between subplots
fig.subplots_adjust(hspace=0.4, wspace=0.4)

# Remove ticks for all subplots
for ax in [axs[0, 0], axs[1, 0], axs[0, 1], axs[1, 1]]:
    ax.set_xticks([])
    ax.set_yticks([])

# save the plot
plt.savefig("Recurring_indicators.jpeg", bbox_inches='tight')    
    
# Show the plot
plt.show()

## New customers 

In [None]:
# Upload data
with open('Data final\\data_new_imputed.pkl', 'rb') as file:
    data = pickle.load(file)

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Consumer Confidence Index", "Exchange rate", "Inflation", "Interest rate", "GDP growth", "Unemployment rate", "Default probability"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Consumer Confidence Index", "Exchange rate", "Inflation", "Interest rate", "GDP growth", "Unemployment rate", "Default probability"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("New_1.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Invoice accounts", "Consumer loans", "Buy-now-pay-later", "Credit cards A", "Credit cards B", "Credit cards C", "Age"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Invoice accounts", "Consumer loans", "Buy-now-pay-later", "Credit cards A", "Credit cards B", "Credit cards C", "Age"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("New_2.jpeg", bbox_inches='tight')

plt.show()

In [None]:
axes = pd.plotting.scatter_matrix(data[["Total revenue", "Age", "Credit cards A", "Consumer loans"]], alpha = 0.2, figsize = (20, 15), marker = "o",
                           hist_kwds = dict(edgecolor = "black", linewidth = 1, bins = 30),
                           edgecolor = "black")

# Remove ticks from x and y axes
for ax in axes.flatten():
    ax.xaxis.set_ticks([])
    ax.yaxis.set_ticks([])

abs_corr = data[["Total revenue", "Age", "Credit cards A", "Consumer loans"]].corr().values
for i, j in zip(*plt.np.triu_indices_from(axes, k = 1)): #triu - TRI-angle U-pper
    _ = axes[i, j].set_xlim((1.1, 1.12))
    _ = axes[i, j].set_ylim((1.1, 1.12))
    _ = axes[i, j].annotate("%.3f" %abs_corr[i,j], (0.5, 0.5), xycoords = 'axes fraction', 
                            ha = 'center', va = 'center', fontsize = 20)
_ = plt.tight_layout()

# remove the padding
plt.tight_layout(pad=0)

# save the plot
plt.savefig("New main.jpeg", bbox_inches='tight')

plt.show()

In [None]:
# Create four subplots
fig, axs = plt.subplots(1, 3, figsize=(15, 10))

# Plot the first histogram
axs[0, 0].hist(data['Total revenue'].loc[data["Gender"] == 0], alpha=0.5, label='Females')
axs[0, 0].hist(data['Total revenue'].loc[data["Gender"] == 1], alpha=0.5, label='Males')
axs[0, 0].set_title('Gender')
axs[0, 0].set_xlabel('Total Revenue')
axs[0, 0].set_ylabel('Number of observations')
axs[0, 0].legend(loc='upper right')
axs[0, 0].tick_params(axis='both', which='both', length=0)

# Plot the second histogram
axs[0, 1].hist(data['Total revenue'].loc[data["Insurance"] == 0], alpha=0.5, label='Not insured')
axs[0, 1].hist(data['Total revenue'].loc[data["Insurance"] == 1], alpha=0.5, label='Insured')
axs[0, 1].set_title('Insurance')
axs[0, 1].set_xlabel('Total Revenue')
axs[0, 1].set_ylabel('Number of observations')
axs[0, 1].legend(loc='upper right')
axs[0, 1].tick_params(axis='both', which='both', length=0)

# Plot the third histogram
axs[0, 2].hist(data['Total revenue'].loc[data["Co-applicant"] == 0], alpha=0.5, label='No co-applicant')
axs[0, 2].hist(data['Total revenue'].loc[data["Co-applicant"] == 1], alpha=0.5, label='With a co-applicant')
axs[0, 2].set_title('Co-applicant')
axs[0, 2].set_xlabel('Total Revenue')
axs[0, 2].set_ylabel('Number of observations')
axs[0, 2].legend(loc='upper right')
axs[0, 2].tick_params(axis='both', which='both', length=0)

# Adjust the spacing between subplots
fig.subplots_adjust(hspace=0.4, wspace=0.4)

# Remove ticks for all subplots
for ax in [axs[0, 0], axs[0, 1], axs[0, 2]]:
    ax.set_xticks([])
    ax.set_yticks([])

# save the plot
plt.savefig("New_indicators.jpeg", bbox_inches='tight')    
    
# Show the plot
plt.show()