In [None]:
#                                                       CUSTOMER LIFETIME VALUE ANALYSIS USING PYTHON

In [None]:
#           Customer lifetime value analysis is used to estimate the total value of customers to the business over the lifetime of-
#            -their relationship.
#           It helps businesses make data-driven decisions on how to allocate their resources and improve their customer relationships.

#           It helps companies determine how much to invest in customer acquisition and retention, as well as identify the most valuable-
#            -customers to prioritize for retention efforts.

In [None]:
#                                                           PROBLEMATICS


#           By analyzing customer lifetime value, companies can identify the most effective marketing channels and campaigns for-
#            -acquiring high-value customers, as well as develop targeted retention strategies to keep those customers engaged and loyal.

In [None]:
#                                                               IMPORT NECESSARY LIBRARIES


#           I’ll start this task by importing the necessary Python libraries and the dataset:

In [4]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"


In [5]:
data_file = 'C:\\Users\\bleed\\OneDrive\\Bureau\\My Python Data Journey\\Practicing Data Science for fun\\acquisition_data\\customer_acquisition_data.csv'
data = pd.read_csv(data_file)
print(data.head())

   customer_id           channel       cost  conversion_rate  revenue
0            1          referral   8.320327         0.123145     4199
1            2  paid advertising  30.450327         0.016341     3410
2            3   email marketing   5.246263         0.043822     3164
3            4      social media   9.546326         0.167592     1520
4            5          referral   8.320327         0.123145     2419


In [7]:
data.shape

(800, 5)

In [None]:
#           Let’s start by visualizing the distribution of acquisition cost and revenue generated by the customer using histograms:

In [6]:
fig = px.histogram(data,
                   x='cost',
                   nbins=20,
                   title='Distribution of Acquisition Cost')
fig.show()

In [8]:
fig = px.histogram(data,
                   x='revenue',
                   nbins=20,
                   title='Distribution of Revenue')
fig.show()

In [None]:
#           Now let’s COMPARE the COST of acquisition across different channels and-
#            -identify the most and least profitable channels:

In [11]:
cost_by_channel = data.groupby('channel')['cost'].mean().reset_index()
#cost_by_channel

fig = px.bar(cost_by_channel,
             x='channel',
             y='cost',
             title='Customer Acquisition Cost by Channel')
fig.show()

In [None]:
#           So paid advertisement is the most expensive channel, and email marketing is the least expensive channel.

#           Now let’s see which channels are most and least effective at converting customers:

In [14]:
conversion_by_channel = data.groupby('channel')['conversion_rate'].mean().reset_index()
#conversion_by_channel

fig = px.bar(conversion_by_channel,
             x='channel',
             y='conversion_rate',
             title='Converting Rate by Channel')
fig.show()

In [None]:
#           Social media is the most effective channel for converting customers, while paid advertising is the least effective.

#           Now let’s calculate the total revenue by channel and have a look at the most and least profitable channels in terms of generating revenue:

In [18]:
total_by_channel = data.groupby('channel')['revenue'].sum().reset_index()
#total_by_channel

fig = px.pie(total_by_channel,
             values='revenue',
             names='channel',
             hole=0.5,
             title='Total Revenue by Channel',
             color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()

In [None]:
#           So email marketing is the most profitable channel in terms of generating revenue.
 
#           But there’s not a huge difference between the percentages of revenue generation from all the channels to call-
#            -any channel the least profitable channel.

In [None]:
#           Now let’s calculate the return on investment (ROI) for each channel:

In [25]:
data['roi'] = data['revenue'] / data['cost']
#data['roi'].head()
roi_by_channel = data.groupby('channel')['roi'].mean().reset_index()
#roi_by_channel

fig = px.bar(roi_by_channel,
             x='channel',
             y='roi',
             title='Return On Investment (ROI) by Channel')
fig.show()

In [None]:
#           The ROI from email marketing is way higher than all other channels, while the ROI from paid advertising is the lowest.

#           Now let’s calculate the Customer LifeTime Value from each channel.
#           Based on the data we have, we can use the formula mentioned below to calculate CLTV :

#                               CLTV = (revenue – cost) * conversion_rate / cost

In [38]:
data['cltv'] = (data['revenue'] - data['cost']) * data['conversion_rate'] / data['cost']
#data['cltv']

channel_cltv = data.groupby('channel')['cltv'].mean().reset_index()
#channel_cltv

fig = px.bar(channel_cltv,
             x='channel',
             y='cltv',
             color='channel',
             title='Customer LifeTime Value by Channel')

fig.update_xaxes(title='Channel')
fig.update_yaxes(title='CLTV')
fig.show()

In [None]:
#           So the customer lifetime value from Social Media and the referral channels is the highest.

#           Now let’s compare the CLTV distributions of the social media and referral channels:

In [40]:
subset = data.loc[data['channel'].isin(['social media', 'referral'])]
#subset

fig = px.box(subset, x='channel',
             y='cltv',
             title='CLTV Distribution by Channel')

fig.update_xaxes(title='Channel')
fig.update_yaxes(title='CLTV')
fig.show()

In [None]:
#           There’s not much difference, but the Customer Lifetime Value from the Social Media channel is-
#            -slightly better than the referral channel.

#           So this is how you can analyze and compare the customer lifetime value from various channels.

In [None]:
#                                                           SUMMARY


#           Customer lifetime value analysis is used to estimate the total value of customers to the business over-
#            -the lifetime of their relationship.
#           It helps companies determine how much to invest in customer acquisition and retention, as well as identify-
#            -the most valuable customers to prioritize for retention efforts.
#           