In [None]:
import pandas as pd

# Path to CSV file
CSV_PATH = 'data/cancelamentos.csv'

# Import data, remove CustomerID column (no use here) and then display
df = pd.read_csv(CSV_PATH)
df = df.drop('CustomerID', axis=1)
display(df)

In [None]:
# Data cleaning: Identify and remove empty values.
display(df.info())
df = df.dropna()
display(df.info())

In [None]:
# How many cancelled and how many stayed?
display(df['cancelou'].value_counts())
display(df['cancelou'].value_counts(normalize=True).map('{:.2%}'.format))

In [None]:
# Customer by contract period type
display(df['duracao_contrato'].value_counts())
display(df['duracao_contrato'].value_counts(normalize=True).map('{:.2%}'.format))

In [None]:
# Analysis by contract period type group
display(df.groupby('duracao_contrato').mean(numeric_only=True))

# Monthly contracts have a cancellation mean of 100%

In [None]:
# Removing monthly contracts
df = df[df['duracao_contrato']!='Monthly']
display(df)
display(df['cancelou'].value_counts())
display(df['cancelou'].value_counts(normalize=True).map('{:.2%}'.format))

In [None]:
# Count and group by plan (assinatura) type
display(df['assinatura'].value_counts(normalize=True))
display(df.groupby('assinatura').mean(numeric_only=True))

# 1/3 for each type, also similar cancellation values


In [None]:
# Creating graphs for better visualization and understanding
import plotly.express as px

for column in df.columns:
    graph = px.histogram(df, x=column, color='cancelou', width=600)
    graph.show()

In [None]:
# By looking at the graphs it is evident that:
# dias_atraso (delayed payment days) above 20 days, 100% cancellation
# ligacoes_callcenter (contacts) above 5, 100% cancellation

df = df[df['ligacoes_callcenter']<5]
df = df[df['dias_atraso']<=20]
display(df)
display(df['cancelou'].value_counts())
display(df['cancelou'].value_counts(normalize=True).map('{:.2%}'.format))

# Resolving this, the customers churn can drop to 18%
# Of course 100% it's unreal, but now we see the main causes
# - monthly contracts
# - need to contact call center
# - payment delays