# Churn Rate Analysis and Insights

In [55]:
# imports
import pandas as pd


## Load Data

In [56]:
# Load data
df = pd.read_csv('data/data_after_processing.csv')
# show column names
df.columns.tolist()

['Unnamed: 0',
 'contract_item_cancellation_status_code',
 'contract_item_customer_contract_life_cycle_status_code',
 'contract_item_description',
 'contract_item_internal_id',
 'contract_item_product_category',
 'contract_item_product_description',
 'contract_item_support_end_date',
 'contract_item_support_start_date',
 'contract_item_validity_status_code',
 'contract_item_concurrent_sessions',
 'description',
 'end_date_time',
 'start_date_time',
 'item_count',
 'item_list_cancellation_status_code',
 'item_list_customer_contract_life_cycle_status_code',
 'item_list_validity_status_code',
 'contract_label',
 'customer_earliest_start',
 'customer_latest_end',
 'customer_label',
 'sla',
 'product_category',
 'service_level_regex',
 'otrs_version',
 'system_type',
 'feature_add_ons',
 'cancellation_date',
 'customer_country',
 'concat_volume',
 'concat_currency',
 'cancellation_date_orca',
 'end_customer_id']

## Analyze the ID column

In [57]:
# show the rows where the end_customer_id is duplicated, sorted by end_customer_id
df[df.end_customer_id.duplicated(keep=False)].sort_values('end_customer_id')

Unnamed: 0.1,Unnamed: 0,contract_item_cancellation_status_code,contract_item_customer_contract_life_cycle_status_code,contract_item_description,contract_item_internal_id,contract_item_product_category,contract_item_product_description,contract_item_support_end_date,contract_item_support_start_date,contract_item_validity_status_code,...,service_level_regex,otrs_version,system_type,feature_add_ons,cancellation_date,customer_country,concat_volume,concat_currency,cancellation_date_orca,end_customer_id
8,8,Not Canceled,Completed,OTRS On-Premise GOLD,10,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2023-01-25 23:00:00,2022-01-25 23:00:00,Expired,...,Gold,8.0.27,auto,"['OTRSCIsInCustomerFrontend', 'OTRSConfigurati...",,GERMANY,14995.0,Euro,,1ybA3NXK
9,9,Not Canceled,Completed,OTRS On-Premise SILVER,10,Contracts On-Premise OTRS,OTRS On-Premise SILVER,2023-01-25 23:00:00,2022-01-25 23:00:00,Expired,...,Silver,8.0.27,auto,"['OTRSCIsInCustomerFrontend', 'OTRSConfigurati...",,GERMANY,3995.0,Euro,,1ybA3NXK
89,89,Not Canceled,In Process,OTRS On-Premise GOLD,20,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2024-11-06 23:00:00,2023-11-06 23:00:00,Active,...,Gold 100 CA,7.0.29,auto,"['OTRSReady2AdoptProcesses', 'OTRSAdvancedEsca...",,GERMANY,9495.0,Euro,,3FGHqzJj
90,90,Not Canceled,In Process,OTRS On-Premise GOLD,20,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2024-11-06 23:00:00,2023-11-06 23:00:00,Active,...,Gold 100 CA,8.0.27,auto,"['OTRSConfigurationManagement', 'OTRSReady2Ado...",,GERMANY,9495.0,Euro,,3FGHqzJj
74,74,Not Canceled,In Process,OTRS GOLD,20,Contracts Managed OTRS,OTRS GOLD,2024-09-18 22:00:00,2023-09-18 22:00:00,Active,...,Gold,2023.1.1,managed,"['OTRSReady2AdoptWebServices', 'SaaSPortalConn...",2023-05-03 07:27:00,GERMANY,17495.0,Euro,2023-05-03 07:27:00,41W93rIA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,170,Canceled,Completed,OTRS On-Premise PLATINUM,10,Contracts On-Premise OTRS,OTRS On-Premise PLATINUM,2023-09-30 22:00:00,2022-09-30 22:00:00,Active,...,Platinum,7.0.38,auto,"['ImportExport', 'OTRSSystemConfigurationHisto...",2021-11-19 07:49:00,GERMANY,34995.0,EUR,2023-04-28 09:57:00,vGFcrZOZ
69,69,Not Canceled,In Process,OTRS SILVER,20,Contracts Managed OTRS,OTRS SILVER,2024-08-31 22:00:00,2023-08-31 22:00:00,Active,...,Silver,2023.1.1,managed,"['OTRSPrimarySecondary', 'OTRSSystemConfigurat...",,GERMANY,5495.0,Euro,,wuvJX3hp
70,70,Not Canceled,In Process,Storage Extension 1TB,30,Other Contract Related Business,Storage Extension 1TB,2024-08-31 22:00:00,2023-09-27 22:00:00,Active,...,Gold,2023.1.1,managed,"['SaaSPortalConnector', 'OTRSAutomation', 'OTR...",,GERMANY,17495.0,Euro,,wuvJX3hp
31,31,Not Canceled,Completed,OTRS TITANIUM,10,Contracts Managed OTRS,OTRS TITANIUM,2023-04-10 22:00:00,2022-04-10 22:00:00,Expired,...,Titanium,2023.1.1,managed,"['OTRSHideShowDynamicFields', 'SaaSPortalConne...",,GERMANY,23995.0,Euro,,zrp7aSpq


## Analyze value counts for categorical columns

In [58]:
# List of columns to include in the analysis (status-related columns + contract_label and customer_label)
status_columns = df.columns[df.columns.str.contains('status', case=False)]
additional_columns = ['contract_label', 'customer_label']

# Combine both sets of columns
all_columns_to_check = list(status_columns) + additional_columns

# Show value counts for each column
for col in all_columns_to_check:
    if col in df.columns:  # Check if the column exists in the dataframe
        print(f"Value counts for {col}:")
        print(df[col].value_counts())
        print("\n")

Value counts for contract_item_cancellation_status_code:
contract_item_cancellation_status_code
Not Canceled              121
Canceled                   72
Cancellation Requested     15
Name: count, dtype: int64


Value counts for contract_item_customer_contract_life_cycle_status_code:
contract_item_customer_contract_life_cycle_status_code
In Process    113
Completed      93
Released        2
Name: count, dtype: int64


Value counts for contract_item_validity_status_code:
contract_item_validity_status_code
Active         132
Expired         70
Not Started      6
Name: count, dtype: int64


Value counts for item_list_cancellation_status_code:
item_list_cancellation_status_code
Not Canceled                      114
Canceled                           69
Partial Cancellation Requested     11
Cancellation Requested              9
Partially Canceled                  5
Name: count, dtype: int64


Value counts for item_list_customer_contract_life_cycle_status_code:
item_list_customer_contract_

## Count unique combinations of status columns

In [59]:
# Select the relevant columns for the analysis
columns_to_analyze = list(status_columns) + additional_columns

# Filter the DataFrame to include only these columns
filtered_df = df[columns_to_analyze]
combination_counts = filtered_df.value_counts().reset_index()
combination_counts.columns = columns_to_analyze + ["count"]

# Display unique combinations across the selected columns
unique_combinations = combination_counts.drop_duplicates()
# Show the unique combinations
unique_combinations

Unnamed: 0,contract_item_cancellation_status_code,contract_item_customer_contract_life_cycle_status_code,contract_item_validity_status_code,item_list_cancellation_status_code,item_list_customer_contract_life_cycle_status_code,item_list_validity_status_code,contract_label,customer_label,count
0,Not Canceled,In Process,Active,Not Canceled,In Process,Active,active,active,90
1,Canceled,Completed,Expired,Canceled,Completed,Expired,cancelled,cancelled,45
2,Canceled,Completed,Active,Canceled,Completed,Active,cancelled,cancelled,19
3,Not Canceled,Completed,Expired,Not Canceled,In Process,Active,active,active,14
4,Cancellation Requested,In Process,Active,Cancellation Requested,In Process,Active,cancelled,cancelled,6
5,Cancellation Requested,In Process,Active,Partial Cancellation Requested,In Process,Active,cancelled,cancelled,4
6,Not Canceled,Completed,Expired,Partial Cancellation Requested,In Process,Active,cancelled,cancelled,4
7,Not Canceled,In Process,Not Started,Not Canceled,In Process,Active,active,active,4
8,Canceled,Completed,Expired,Canceled,Completed,Expired,cancelled,active,3
9,Cancellation Requested,In Process,Active,Cancellation Requested,In Process,Active,cancelled,active,3


### Define Churn

In [64]:
# Define churn based on some conditions
df['churn'] = (df['contract_label'] == 'cancelled').astype(int)  # Convert boolean values to 0 and 1

# print the number of churned and non-churned customers
print(df['churn'].value_counts())

churn
0    112
1     96
Name: count, dtype: int64


## Save to CSV

In [65]:
# drop coluns wuth status
df = df.drop(columns=columns_to_analyze)

# drop logical error columns
df = df.drop(columns=['contract_item_logical_error', 'item_list_logical_error', 'customer_logical_error', 'date_logical_error', 'logical_error'])

# drop Unnamed: 0
df = df.drop(columns=['Unnamed: 0'])
df

Unnamed: 0,contract_item_description,contract_item_internal_id,contract_item_product_category,contract_item_product_description,contract_item_support_end_date,contract_item_support_start_date,contract_item_concurrent_sessions,description,end_date_time,start_date_time,...,otrs_version,system_type,feature_add_ons,cancellation_date,customer_country,concat_volume,concat_currency,cancellation_date_orca,end_customer_id,churn
0,OTRS GOLD,10,Contracts Managed OTRS,OTRS GOLD,2023-12-06 23:00:00,2022-12-06 23:00:00,50.0,OTRS GOLD,2023-12-06 23:00:00,2022-12-06 23:00:00,...,7.0.48,managed,"['SaaSPortalConnector', 'OTRSCalendarResourceP...",,GERMANY,17495.0,Euro,,CFHbGks3,0
1,OTRS On-Premise PLATINUM,10,Contracts On-Premise OTRS,OTRS On-Premise PLATINUM,2023-12-11 23:00:00,2022-12-11 23:00:00,300.0,OTRS On-Premise PLATINUM,2023-12-11 23:00:00,2022-12-11 23:00:00,...,7.0.23,auto,"['GeneralCatalog', 'OTRSSystemConfigurationHis...",,SPAIN,37800.0,Euro,,laVvIOXe,0
2,OTRS On-Premise GOLD,10,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2023-12-31 23:00:00,2022-12-31 23:00:00,50.0,OTRS On-Premise GOLD,2023-12-31 23:00:00,2018-12-31 23:00:00,...,7.0.22,auto,"['OTRSHideShowDynamicFields', 'OTRSEscalationS...",2017-05-16 08:38:00,GERMANY,5995.0,Euro,,Yc6VmmVi,0
3,OTRS On-Premise GOLD (Testsystem),10,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2023-12-31 23:00:00,2022-12-31 23:00:00,50.0,OTRS On-Premise GOLD,2023-12-31 23:00:00,2018-12-31 23:00:00,...,7.0.12,auto,"['GeneralCatalog', 'OTRSSystemConfigurationHis...",2017-05-16 08:38:00,GERMANY,5995.0,Euro,,Yc6VmmVi,0
4,OTRS On-Premise SILVER,10,Contracts On-Premise OTRS,OTRS On-Premise SILVER,2023-12-31 23:00:00,2022-12-31 23:00:00,10.0,OTRS On-Premise SILVER,2023-12-31 23:00:00,2018-12-31 23:00:00,...,7.0.12,auto,"['OTRSAdvancedEscalations', 'AkquinetAssetSear...",2017-05-16 08:38:00,GERMANY,3995.0,Euro,,Yc6VmmVi,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,OTRS Contract - Annual Support ENTRY ADV,20,Contracts On-Premise Old,Old Basic,2024-08-15 22:00:00,2023-08-15 22:00:00,,Old Basic,2024-08-15 22:00:00,2004-08-15 22:00:00,...,5.0.1,manual,"['FAQ', 'OTRSMasterSlave']",,GERMANY,4995.0,Euro,2023-06-29 06:36:00,5gw4WtWX,1
204,OTRS On-Premise GOLD,10,Contracts On-Premise OTRS,OTRS On-Premise GOLD,2023-01-11 23:00:00,2022-10-11 22:00:00,50.0,OTRS On-Premise GOLD,2024-10-11 22:00:00,2022-10-11 22:00:00,...,8.0.36,auto,"['OTRSCloneDB', 'Survey', 'OTRSConfigurationMa...",,UNITED STATES OF AMERICA,15047.0,US-Dollar,,U00PI7xM,1
205,OTRS On-Premise PLATINUM,20,Contracts On-Premise OTRS,OTRS On-Premise PLATINUM,2024-10-16 22:00:00,2023-10-16 22:00:00,200.0,OTRS On-Premise PLATINUM,2024-10-16 22:00:00,2020-10-16 22:00:00,...,7.0.40,auto,"['OTRSMasterSlave', 'OTRSDynamicFieldDatabase'...",,AUSTRIA,25995.0,Euro,,Fs3qolwK,1
206,OTRS GOLD,40,Contracts Managed OTRS,OTRS GOLD,2024-10-26 22:00:00,2023-10-26 22:00:00,50.0,OTRS GOLD,2024-10-26 22:00:00,2020-10-26 23:00:00,...,2023.1.1,managed,"['OTRSStatePreselectionResponseTemplates', 'OT...",,GERMANY,15995.0,Euro,,JuFKu1yu,1


In [67]:
df.to_csv("data/data_with_churn.csv")