In [1]:
#Imports 
import pandas as pd
import numpy as np
from datetime import datetime
from sqlalchemy import text
from Database.database import engine, SessionLocal
from Database.models import (
    FactCampaignInteraction, 
    DimCampaign, 
    FactUserAnalyticsSnapshot,
    CampaignPerformance
)
print("Imports successful")


Imports successful


In [2]:
snapshot_date_key = int(datetime.now().strftime("%Y%m%d"))

print("\n" + "="*80)
print("OADING CAMPAIGN DATA")
print("="*80)

with SessionLocal() as session:
    campaigns = session.query(DimCampaign).all()
    campaigns_data = [{
        'campaign_key': c.campaign_key,
        'campaign_name': c.campaign_name,
        'target_risk_segment': c.target_risk_segment,
        'campaign_type': c.campaign_type,
        'start_date_key': c.start_date_key,
        'end_date_key': c.end_date_key
    } for c in campaigns]
    campaigns_df = pd.DataFrame(campaigns_data)
    
    interactions = session.query(FactCampaignInteraction).all()
    interactions_data = [{
        'campaign_key': i.campaign_key,
        'user_key': i.user_key,
        'sent_flag': i.sent_flag,
        'opened_flag': i.opened_flag,
        'clicked_flag': i.clicked_flag,
        'converted_flag': i.converted_flag
    } for i in interactions]
    interactions_df = pd.DataFrame(interactions_data)
    
    users = session.query(FactUserAnalyticsSnapshot).filter(
        FactUserAnalyticsSnapshot.snapshot_date_key == snapshot_date_key
    ).all()
    users_data = [{
        'user_key': u.user_key,
        'segment_label': u.segment_label,
        'engagement_level': u.engagement_level,
        'churn_probability': u.churn_probability
    } for u in users]
    users_df = pd.DataFrame(users_data)

print(f"Loaded {len(campaigns_df)} campaigns")
print(f"Loaded {len(interactions_df):,} interactions")
print(f"Loaded {len(users_df):,} users")

print("\nCampaigns to Analyze:")
print(campaigns_df[['campaign_name', 'target_risk_segment', 'start_date_key']].to_string(index=False))



OADING CAMPAIGN DATA
Loaded 50 campaigns
Loaded 2,000 interactions
Loaded 1,000 users

Campaigns to Analyze:
                campaign_name target_risk_segment  start_date_key
     Onboarding Campaign Mind           All Users        20251116
  Onboarding Campaign Compare           All Users        20250901
      Onboarding Campaign Cut           All Users        20250914
 Onboarding Campaign Cultural           All Users        20250927
       Upsell Campaign Minute         Medium Risk        20250903
    Upsell Campaign President         Medium Risk        20250910
        Upsell Campaign Owner         Medium Risk        20251126
    Retention Campaign Nature           High Risk        20250831
        Upsell Campaign Offer              Active        20250929
       Retention Campaign For           High Risk        20251115
   Onboarding Campaign Sister           All Users        20251112
   Retention Campaign Science           High Risk        20250929
        Upsell Campaign Place   

In [3]:
print("\n" + "="*80)
print("CALCULATING PER-CAMPAIGN PERFORMANCE")
print("="*80)

campaign_performance_list = []

for idx, campaign in campaigns_df.iterrows():
    campaign_key = campaign['campaign_key']
    campaign_name = campaign['campaign_name']
    target_segment = campaign['target_risk_segment']
    
    print(f"\n{'='*60}")
    print(f"Campaign: {campaign_name}")
    print(f"   Target: {target_segment}")
    print(f"{'='*60}")
    
    campaign_interactions = interactions_df[interactions_df['campaign_key'] == campaign_key]
    
    if len(campaign_interactions) == 0:
        print(f"No interactions found - skipping")
        continue
    
    # Open rate
    users_sent = int(campaign_interactions['sent_flag'].sum())
    users_opened = int(campaign_interactions['opened_flag'].sum())
    open_rate = (users_opened / users_sent * 100) if users_sent > 0 else 0
    
    print(f"   üìß Open Rate: {open_rate:.1f}% ({users_opened}/{users_sent})")
    
    # Retention lift
    campaign_user_keys = campaign_interactions['user_key'].unique()
    campaign_users = users_df[users_df['user_key'].isin(campaign_user_keys)].copy()
    
    segment_mapping = {
        'At-Risk': ['At-Risk Premium', 'Declining Premium', 'Need Attention'],
        'Dormant': ['Dormant Premium', 'Recently Churned'],
        'Medium': ['Medium Engaged', 'Potential Loyalists'],
        'Highly Engaged': ['Champions', 'Loyal Customers', 'Highly Engaged'],
        'All': None  
    }
    
    if target_segment and target_segment in segment_mapping:
        target_segments = segment_mapping[target_segment]
        
        if target_segments:
            # Control: same segment type, didn't receive campaign
            control_users = users_df[
                (~users_df['user_key'].isin(campaign_user_keys)) &
                (users_df['segment_label'].isin(target_segments))
            ].copy()
        else:
            # Target = All
            control_users = users_df[~users_df['user_key'].isin(campaign_user_keys)].copy()
    else:
        control_users = users_df[~users_df['user_key'].isin(campaign_user_keys)].copy()
    
    print(f"Campaign Group: {len(campaign_users):,} users")
    print(f"Control Group:  {len(control_users):,} users")
    
    if len(control_users) == 0:
        print(f"WARNING: No control users found for segment '{target_segment}'")
        print(f"Using all non-campaign users as control")
        control_users = users_df[~users_df['user_key'].isin(campaign_user_keys)].copy()
        print(f"Control Group (fallback): {len(control_users):,} users")
    
    campaign_users['is_retained'] = (
        ~campaign_users['segment_label'].isin(['Recently Churned', 'Dormant Premium']) &
        (campaign_users['churn_probability'] < 0.5)
    ).astype(int)
    
    control_users['is_retained'] = (
        ~control_users['segment_label'].isin(['Recently Churned', 'Dormant Premium']) &
        (control_users['churn_probability'] < 0.5)
    ).astype(int)
    
    campaign_retention = (campaign_users['is_retained'].mean() * 100) if len(campaign_users) > 0 else 0
    control_retention = (control_users['is_retained'].mean() * 100) if len(control_users) > 0 else 0
    retention_lift = campaign_retention - control_retention
    
    print(f"Campaign Retention: {campaign_retention:.1f}%")
    print(f"Control Retention:  {control_retention:.1f}%")
    print(f"Retention Lift:     {retention_lift:+.1f}%")
    
    # Churn Rate
    campaign_churn_rate = 100 - campaign_retention
    control_churn_rate = 100 - control_retention
    
    print(f"Campaign Churn Rate: {campaign_churn_rate:.1f}%")
    print(f"Control Churn Rate:  {control_churn_rate:.1f}%")
    
    # Status
    current_date = int(datetime.now().strftime("%Y%m%d"))
    
    if campaign['end_date_key'] and current_date > campaign['end_date_key']:
        status = "Completed"
    elif campaign['start_date_key'] and current_date >= campaign['start_date_key']:
        status = "Active"
    else:
        status = "Scheduled"
    
    print(f"Status: {status}")
    
    # Results
    campaign_performance_list.append({
        'campaign_key': campaign_key,
        'snapshot_date_key': snapshot_date_key,
        'campaign_name': campaign_name,
        'target_segment': target_segment,
        'launch_date': campaign['start_date_key'],
        'users_sent': users_sent,
        'users_opened': users_opened,
        'open_rate': round(open_rate, 1),
        'campaign_retention_rate': round(campaign_retention, 1),
        'control_retention_rate': round(control_retention, 1),
        'retention_lift': round(retention_lift, 1),
        'campaign_churn_rate': round(campaign_churn_rate, 1),      # ‚Üê NEW
        'control_churn_rate': round(control_churn_rate, 1),        # ‚Üê NEW
        'campaign_size': len(campaign_users),
        'control_size': len(control_users),
        'status': status,
        'created_at': datetime.now()
    })

campaign_performance_df = pd.DataFrame(campaign_performance_list)

print("\n" + "="*80)
print("CAMPAIGN PERFORMANCE SUMMARY")
print("="*80)
print(campaign_performance_df[[
    'campaign_name', 
    'target_segment',
    'open_rate', 
    'retention_lift',
    'status'
]].to_string(index=False))


CALCULATING PER-CAMPAIGN PERFORMANCE

Campaign: Onboarding Campaign Mind
   Target: All Users
   üìß Open Rate: 74.4% (32/43)
Campaign Group: 42 users
Control Group:  958 users
Campaign Retention: 50.0%
Control Retention:  45.6%
Retention Lift:     +4.4%
Campaign Churn Rate: 50.0%
Control Churn Rate:  54.4%
Status: Completed

Campaign: Onboarding Campaign Compare
   Target: All Users
   üìß Open Rate: 72.7% (24/33)
Campaign Group: 33 users
Control Group:  967 users
Campaign Retention: 48.5%
Control Retention:  45.7%
Retention Lift:     +2.8%
Campaign Churn Rate: 51.5%
Control Churn Rate:  54.3%
Status: Completed

Campaign: Onboarding Campaign Cut
   Target: All Users
   üìß Open Rate: 75.0% (27/36)
Campaign Group: 35 users
Control Group:  965 users
Campaign Retention: 48.6%
Control Retention:  45.7%
Retention Lift:     +2.9%
Campaign Churn Rate: 51.4%
Control Churn Rate:  54.3%
Status: Completed

Campaign: Onboarding Campaign Cultural
   Target: All Users
   üìß Open Rate: 78.3% (

In [4]:
print("\n" + "="*80)
print("SAVING CAMPAIGN PERFORMANCE TO DATABASE")
print("="*80)

with SessionLocal() as session:
    deleted = session.query(CampaignPerformance).filter(
        CampaignPerformance.snapshot_date_key == snapshot_date_key
    ).delete()
    session.commit()
    if deleted > 0:
        print(f"Deleted {deleted} existing records for snapshot date {snapshot_date_key}")
        
with SessionLocal() as session:
    for idx, row in campaign_performance_df.iterrows():
        record = CampaignPerformance(
            campaign_key=int(row['campaign_key']),
            snapshot_date_key=int(row['snapshot_date_key']),
            campaign_name=row['campaign_name'],
            target_segment=row['target_segment'],
            launch_date=int(row['launch_date']) if pd.notna(row['launch_date']) else None,
            users_sent=int(row['users_sent']),
            users_opened=int(row['users_opened']),
            open_rate=float(row['open_rate']),
            campaign_retention_rate=float(row['campaign_retention_rate']),
            control_retention_rate=float(row['control_retention_rate']),
            retention_lift=float(row['retention_lift']),
            campaign_churn_rate=float(row['campaign_churn_rate']),      # ‚Üê ADDED
            control_churn_rate=float(row['control_churn_rate']),        # ‚Üê ADDED
            campaign_size=int(row['campaign_size']),
            control_size=int(row['control_size']),
            status=row['status'],
            created_at=row['created_at']
        )
        session.add(record)
    
    session.commit()

print(f"Saved {len(campaign_performance_df)} campaigns to database")


with SessionLocal() as session:
    count = session.query(CampaignPerformance).filter(
        CampaignPerformance.snapshot_date_key == snapshot_date_key
    ).count()
    
    print(f"Verified: {count} records in database for snapshot {snapshot_date_key}")



SAVING CAMPAIGN PERFORMANCE TO DATABASE
Saved 50 campaigns to database
Verified: 50 records in database for snapshot 20251128
