In [None]:

# Customer Acquisition Analysis: Insights into Marketing Channel Efficiency, ROI, and CLTV

## Objective
The objective of this project is to analyze customer acquisition data to identify trends and patterns that influence customer behavior, assess the effectiveness of marketing channels, and improve customer lifetime value (CLTV) by optimizing return on investment (ROI).

# Import Required Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:

# Load Data
df = pd.read_csv('customer_acquisition_data.csv')

# Preview the data
df.head()

# Check for missing values and data structure
df.info()


In [None]:

## Data Preparation

# Ensure consistency in column names
df.columns = df.columns.str.lower()

# Add new features
# ROI Calculation
df['roi'] = (df['revenue'] - df['cost']) / df['cost']

# CLTV Calculation
df['cltv'] = df['revenue'] - df['cost'] + (df['conversion_rate'] * df['roi'])

# Preview the modified dataset
df.head()


In [None]:

## Exploratory Data Analysis

# 1. Distribution of Marketing Costs Across Channels
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='channel', y='cost', ci=None, estimator=lambda x: x.mean(), palette='muted')
plt.title('Average Cost by Marketing Channel', fontsize=16)
plt.xlabel('Channel', fontsize=14)
plt.ylabel('Cost', fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# **Explanation:** This chart shows the average cost incurred by each marketing channel. Higher costs for a specific channel might indicate either higher investments or inefficiency. Channels with lower costs but higher ROI could be prioritized for optimization.


In [None]:

# 2. Revenue Distribution by Channel
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='channel', y='revenue', ci=None, estimator=lambda x: x.mean(), palette='muted')
plt.title('Average Revenue by Marketing Channel', fontsize=16)
plt.xlabel('Channel', fontsize=14)
plt.ylabel('Revenue', fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# **Explanation:** This chart highlights the revenue generated by each marketing channel. Channels generating higher revenue deserve further analysis to understand what drives their performance and whether they align with ROI metrics.


In [None]:

# 3. ROI Distribution by Channel
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='channel', y='roi', ci=None, estimator=lambda x: x.mean(), palette='coolwarm')
plt.title('Average ROI by Marketing Channel', fontsize=16)
plt.xlabel('Channel', fontsize=14)
plt.ylabel('ROI', fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# **Explanation:** The ROI chart indicates the efficiency of different channels in converting investments into revenue. Channels with higher ROI suggest more efficient use of marketing budgets.


In [None]:

# 4. Cost vs Revenue by Channel (Scatter Plot)
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='cost', y='revenue', hue='channel', size='revenue', sizes=(50, 300), alpha=0.7)
plt.title('Cost vs Revenue by Channel', fontsize=16)
plt.xlabel('Cost', fontsize=14)
plt.ylabel('Revenue', fontsize=14)
plt.legend(title='Channel')
plt.tight_layout()
plt.show()

# **Explanation:** This scatter plot visualizes the relationship between cost and revenue for each channel. Channels clustered in the higher revenue and lower cost range represent better-performing marketing strategies.


In [None]:

# 5. ROI vs CLTV (Scatter Plot)
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='roi', y='cltv', hue='channel', size='cltv', sizes=(50, 300), alpha=0.7)
plt.title('ROI vs CLTV by Channel', fontsize=16)
plt.xlabel('ROI', fontsize=14)
plt.ylabel('CLTV', fontsize=14)
plt.legend(title='Channel')
plt.tight_layout()
plt.show()

# **Explanation:** This scatter plot illustrates how ROI impacts CLTV across different channels. Channels with both high ROI and CLTV are ideal for scaling marketing efforts.


In [None]:

# 6. Customer Segmentation by CLTV
high_cltv = df['cltv'].quantile(0.75)
low_cltv = df['cltv'].quantile(0.25)
df['cltv_segment'] = pd.cut(df['cltv'], bins=[-float('inf'), low_cltv, high_cltv, float('inf')],
                            labels=['Low CLTV', 'Medium CLTV', 'High CLTV'])
segments = df['cltv_segment'].value_counts()

plt.figure(figsize=(8, 8))
plt.pie(segments, labels=segments.index, autopct='%1.1f%%', startangle=140, wedgeprops={'width': 0.3})
plt.title('Customer Segmentation by CLTV', fontsize=16)
plt.tight_layout()
plt.show()

# **Explanation:** This pie chart divides customers into Low, Medium, and High CLTV segments. This segmentation helps identify opportunities for improvement in customer value across various marketing strategies.


In [None]:

# 7. Revenue vs. CLTV (Regression Plot)
plt.figure(figsize=(10, 6))
sns.regplot(data=df, x='revenue', y='cltv', scatter_kws={'alpha': 0.7}, line_kws={'color': 'red'})
plt.title('Revenue vs. CLTV', fontsize=16)
plt.xlabel('Revenue', fontsize=14)
plt.ylabel('CLTV', fontsize=14)
plt.tight_layout()
plt.show()

# **Explanation:** The regression plot demonstrates a strong positive correlation between revenue and CLTV. This emphasizes that channels driving revenue also contribute significantly to long-term customer value.


In [None]:

# Summary of Findings
# 1. Cost and revenue vary significantly across channels, with some channels yielding higher returns.
# 2. ROI analysis highlights the efficiency of marketing channels, with some channels providing better returns on investment.
# 3. CLTV segments show that a small portion of customers fall into the High CLTV category.
# 4. A strong positive correlation exists between revenue and CLTV, indicating that revenue-driving channels are critical for long-term value.

# Recommendations
# 1. Focus on channels with high ROI and revenue.
# 2. Invest in customer retention strategies to increase CLTV for Medium and Low CLTV segments.
# 3. Regularly evaluate the performance of marketing channels to allocate budgets effectively.
