In [None]:

# Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance


In [None]:

# Load your dataset (adjust path if needed)
data1 = pd.read_csv('purchase_data.csv')
data2 = pd.read_csv('promotion_data.csv')
data = pd.merge(data1, data2, left_index=True, right_index=True, how='inner')


In [None]:

# Convert timestamps to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'])  # purchase time
data['promotion_timestamp'] = pd.to_datetime(data['promotion_timestamp'])  # promotion time

# Calculate time delay in hours between promotion and purchase
data['promotion_diff_hours'] = (data['promotion_timestamp'] - data['timestamp']).dt.total_seconds() / 3600


In [None]:

# Compute Click-Through Rate (CTR)
data['CTR'] = data['clicks'] / data['impressions']


In [None]:

# CTR vs. Time Delay Between Promotion and Purchase
plt.figure(figsize=(10, 6))
sns.lineplot(x='promotion_diff_hours', y='CTR', data=data)
plt.title('CTR vs. Time Delay Between Promotion and Purchase')
plt.xlabel('Delay (hours)')
plt.ylabel('Click-Through Rate')
plt.grid(True)
plt.show()


In [None]:

# CTR vs. Promotion Hour
data['promotion_hour'] = data['promotion_timestamp'].dt.hour
hour_ctr = data.groupby('promotion_hour')['CTR'].mean()

plt.figure(figsize=(10, 6))
sns.barplot(x=hour_ctr.index, y=hour_ctr.values)
plt.title('CTR by Promotion Hour')
plt.xlabel('Hour of Day')
plt.ylabel('Average CTR')
plt.grid(True)
plt.show()


In [None]:

# CTR vs. Day of Week
data['promotion_day'] = data['promotion_timestamp'].dt.day_name()
day_ctr = data.groupby('promotion_day')['CTR'].mean().reindex(
    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
)

plt.figure(figsize=(10, 6))
sns.barplot(x=day_ctr.index, y=day_ctr.values)
plt.title('CTR by Day of Week')
plt.xlabel('Day')
plt.ylabel('Average CTR')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


In [None]:

# --- Customer Value vs Engagement Analysis ---
# Determine if high-value purchases correlate with promotion responsiveness

# Step 1: Create 'clicked' column to classify users
data['clicked'] = data['clicks'] > 0

# Step 2: KDE plot of purchase price for clicked vs not clicked users
plt.figure(figsize=(10, 6))
sns.kdeplot(data=data[data['clicked']], x='purchase_price', label='Clicked', fill=True)
sns.kdeplot(data=data[~data['clicked']], x='purchase_price', label='Not Clicked', fill=True)
plt.title('Purchase Price Distribution: Clicked vs. Not Clicked Users')
plt.xlabel('Purchase Price')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()


In [None]:

# --- Temporal Heatmap of Engagement ---
# Create a 2D heatmap of CTR by day of week and hour of day

data['weekday'] = data['promotion_timestamp'].dt.day_name()
data['hour'] = data['promotion_timestamp'].dt.hour

heatmap_data = data.groupby(['weekday', 'hour'])['CTR'].mean().unstack()
ordered_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
heatmap_data = heatmap_data.reindex(ordered_days)

plt.figure(figsize=(12, 6))
sns.heatmap(heatmap_data, cmap='YlGnBu', annot=True, fmt=".3f", linewidths=0.5)
plt.title('CTR Heatmap by Day of Week and Hour of Day')
plt.xlabel('Hour of Day')
plt.ylabel('Day of Week')
plt.show()


In [None]:

# --- Trend Analysis Over Time ---
# Track purchases and CTR evolution week over week and month over month

# Ensure timestamp is in datetime format
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Create week and month columns
data['week'] = data['timestamp'].dt.to_period('W').apply(lambda r: r.start_time)
data['month'] = data['timestamp'].dt.to_period('M').apply(lambda r: r.start_time)

# Group by week
weekly_stats = data.groupby('week').agg(
    total_purchases=('purchase_price', 'count'),
    avg_ctr=('CTR', 'mean')
).reset_index()

# Group by month
monthly_stats = data.groupby('month').agg(
    total_purchases=('purchase_price', 'count'),
    avg_ctr=('CTR', 'mean')
).reset_index()

# Plot weekly trends
plt.figure(figsize=(12, 5))
sns.lineplot(data=weekly_stats, x='week', y='total_purchases', label='Weekly Purchases')
sns.lineplot(data=weekly_stats, x='week', y='avg_ctr', label='Weekly Avg CTR')
plt.title('Weekly Trends: Purchases and CTR')
plt.xlabel('Week')
plt.ylabel('Count / CTR')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# Plot monthly trends
plt.figure(figsize=(12, 5))
sns.lineplot(data=monthly_stats, x='month', y='total_purchases', label='Monthly Purchases')
sns.lineplot(data=monthly_stats, x='month', y='avg_ctr', label='Monthly Avg CTR')
plt.title('Monthly Trends: Purchases and CTR')
plt.xlabel('Month')
plt.ylabel('Count / CTR')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
