In [None]:
import sys
import subprocess

def install_if_missing(package):
    try:
        __import__(package)
    except ImportError:
        print(f'Installing {package}...')
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install_if_missing('plotly')


### Step 1: Generate Churn Data

In [None]:
import pandas as pd
import numpy as np
import os
# Ensure data directory exists
os.makedirs('data', exist_ok=True)
np.random.seed(42)
n_samples = 1000
data = {
    'customer_id': range(1, n_samples + 1),
    'tenure_months': np.random.randint(1, 60, n_samples),
    'monthly_spend': np.random.uniform(20, 200, n_samples),
    'support_tickets': np.random.randint(0, 10, n_samples),
    'churned': np.random.choice([0, 1], n_samples, p=[0.75, 0.25])
}
df_churn = pd.DataFrame(data)
df_churn.to_csv('data/churn_data.csv', index=False)
print('✅ Churn data saved to data/churn_data.csv')
df_churn.head()

### Step 2: Generate Synthetic Marketing Data

In [None]:
import pandas as pd
import numpy as np
np.random.seed(42)
n_samples = 500
dates = pd.date_range(start='2023-01-01', periods=n_samples, freq='D')
data = {
    'date': dates,
    'spend': np.random.uniform(1000, 5000, n_samples),
    'engagement': np.random.uniform(200, 1000, n_samples),
    'clicks': np.random.randint(50, 300, n_samples),
    'impressions': np.random.randint(1000, 10000, n_samples)
}
df_marketing = pd.DataFrame(data)
print('✅ Generated synthetic marketing data')
df_marketing.head()

### Step 3: Load Churn Data

In [None]:
import pandas as pd
df_churn = pd.read_csv('data/churn_data.csv')
print(f'✅ Churn Data Loaded. Total Records: {len(df_churn)}')
df_churn.head()

### Step 4: Plot Marketing Spend and Engagement Over Time

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12, 6))
sns.lineplot(x='date', y='spend', data=df_marketing, label='Spend')
sns.lineplot(x='date', y='engagement', data=df_marketing, label='Engagement')
plt.title('Spend and Engagement Over Time')
plt.legend()
plt.show()

### Step 5: Marketing Feature Correlation Heatmap

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(8,6))
sns.heatmap(df_marketing[['spend', 'engagement', 'clicks', 'impressions']].corr(), annot=True, cmap='coolwarm')
plt.title('Marketing Feature Correlation Heatmap')
plt.show()

### Step 6: Engagement vs Clicks Scatter Plot

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(8,6))
sns.scatterplot(x='engagement', y='clicks', data=df_marketing)
plt.title('Engagement vs Clicks')
plt.xlabel('Engagement')
plt.ylabel('Clicks')
plt.show()

### Step 7: Histogram of Marketing Spend and Engagement

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.histplot(df_marketing['spend'], bins=30, kde=True, color='blue')
plt.title('Spend Distribution')
plt.subplot(1, 2, 2)
sns.histplot(df_marketing['engagement'], bins=30, kde=True, color='green')
plt.title('Engagement Distribution')
plt.tight_layout()
plt.show()

### Step 8: Box Plot of Marketing Clicks and Impressions

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(8, 6))
sns.boxplot(data=df_marketing[['clicks', 'impressions']])
plt.title('Box Plot of Clicks and Impressions')
plt.show()

### Step 9: Pair Plot of Marketing Features

In [None]:
import seaborn as sns
sns.pairplot(df_marketing[['spend', 'engagement', 'clicks', 'impressions']])
plt.suptitle('Pair Plot of Marketing Features', y=1.02)
plt.show()

### Step 10: Rolling Average of Marketing Spend and Engagement

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12, 6))
df_marketing['spend_rolling'] = df_marketing['spend'].rolling(window=30, min_periods=1).mean()
df_marketing['engagement_rolling'] = df_marketing['engagement'].rolling(window=30, min_periods=1).mean()
sns.lineplot(x='date', y='spend_rolling', data=df_marketing, label='Spend (30-day Rolling Avg)')
sns.lineplot(x='date', y='engagement_rolling', data=df_marketing, label='Engagement (30-day Rolling Avg)')
plt.title('30-Day Rolling Average of Spend and Engagement')
plt.legend()
plt.show()

### Step 11: Marketing Feature Engineering (Click-Through Rate)

In [None]:
df_marketing['ctr'] = df_marketing['clicks'] / df_marketing['impressions'] * 100
print('Added Click-Through Rate (CTR) column to marketing data:')
df_marketing.head()

### Step 12: Marketing Correlation with P-Values

In [None]:
from scipy.stats import pearsonr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def corr_pval(df, col1, col2):
    corr, pval = pearsonr(df[col1], df[col2])
    return corr, pval

features = ['spend', 'engagement', 'clicks', 'impressions', 'ctr']
corr_matrix = df_marketing[features].corr()
pval_matrix = pd.DataFrame(index=features, columns=features)
for col1 in features:
    for col2 in features:
        if col1 != col2:
            _, pval = corr_pval(df_marketing, col1, col2)
            pval_matrix.loc[col1, col2] = pval
        else:
            pval_matrix.loc[col1, col2] = 0.0

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=pval_matrix, fmt='.2e', cmap='coolwarm')
plt.title('Marketing Correlation Heatmap with P-Values')
plt.show()

### Step 13: Linear Regression for Marketing Clicks Prediction

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
X = df_marketing[['spend', 'engagement', 'ctr']]
y = df_marketing['clicks']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error for Clicks Prediction: {mse:.2f}')
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel('Actual Clicks')
plt.ylabel('Predicted Clicks')
plt.title('Linear Regression: Actual vs Predicted Clicks')
plt.show()

### Step 14: Churn Data Pair Plot

In [None]:
import seaborn as sns
sns.pairplot(df_churn[['tenure_months', 'monthly_spend', 'support_tickets', 'churned']], hue='churned')
plt.suptitle('Churn Data Pair Plot', y=1.02)
plt.show()

### Step 15: Churn Rate by Tenure

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
df_churn['tenure_bin'] = pd.cut(df_churn['tenure_months'], bins=5)
sns.barplot(x='tenure_bin', y='churned', data=df_churn, errorbar=None)
plt.title('Churn Rate by Tenure Bin')
plt.xlabel('Tenure Bins (Months)')
plt.ylabel('Churn Rate')
plt.xticks(rotation=45)
plt.show()

### Step 16: Logistic Regression for Churn Prediction

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
X = df_churn[['tenure_months', 'monthly_spend', 'support_tickets']]
y = df_churn['churned']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Churn Prediction Classification Report:')
print(classification_report(y_test, y_pred, zero_division=1))
plt.figure(figsize=(8, 6))
sns.barplot(x=model.coef_[0], y=['Tenure Months', 'Monthly Spend', 'Support Tickets'])
plt.title('Logistic Regression Feature Importance')
plt.xlabel('Coefficient')
plt.show()

### Step 17: K-Means Clustering of Churn Data

In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
X = df_churn[['tenure_months', 'monthly_spend', 'support_tickets']]
kmeans = KMeans(n_clusters=3, random_state=42)
df_churn['cluster'] = kmeans.fit_predict(X)
plt.figure(figsize=(8, 6))
sns.scatterplot(x='tenure_months', y='monthly_spend', hue='cluster', style='churned', data=df_churn)
plt.title('K-Means Clustering of Customers')
plt.show()

### Step 18: Interactive Plotly Chart for Marketing Engagement

In [None]:
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [None]:
import plotly.express as px
fig = px.line(df_marketing, x='date', y='engagement', title='Interactive Engagement Over Time')
fig.show()


In [None]:
import plotly.express as px
fig = px.line(df_marketing, x='date', y='engagement', title='Interactive Engagement Over Time')
fig.show()


### Step 19: Save Processed Data

In [None]:
import os
os.makedirs('data', exist_ok=True)
df_marketing.to_csv('data/marketing_analysis_data.csv', index=False)
df_churn.to_csv('data/churn_analysis_data.csv', index=False)
print('✅ Marketing data saved to data/marketing_analysis_data.csv')
print('✅ Churn data saved to data/churn_analysis_data.csv')