In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from scipy import stats
from scipy.stats import chi2_contingency, pearsonr
import json

class AdCopywriterAI:
    def __init__(self):
        self.audience_segments = {
            'tech_enthusiasts': {
                'demographics': {'age': '25-45', 'income': 'high', 'education': 'college+'},
                'interests': ['technology', 'gadgets', 'innovation', 'startups'],
                'tone': 'technical',
                'pain_points': ['efficiency', 'staying updated', 'competitive advantage']
            },
            'budget_conscious': {
                'demographics': {'age': '30-55', 'income': 'medium', 'education': 'varied'},
                'interests': ['deals', 'savings', 'value', 'family'],
                'tone': 'practical',
                'pain_points': ['cost', 'value for money', 'budget constraints']
            },
            'millennials': {
                'demographics': {'age': '25-40', 'income': 'medium-high', 'education': 'college'},
                'interests': ['experiences', 'sustainability', 'social media', 'wellness'],
                'tone': 'casual',
                'pain_points': ['work-life balance', 'authenticity', 'environmental impact']
            },
            'small_business': {
                'demographics': {'age': '35-55', 'income': 'varied', 'education': 'varied'},
                'interests': ['growth', 'efficiency', 'ROI', 'automation'],
                'tone': 'professional',
                'pain_points': ['time management', 'scaling', 'competition']
            },
            'luxury_seekers': {
                'demographics': {'age': '35-65', 'income': 'high', 'education': 'college+'},
                'interests': ['premium', 'exclusivity', 'quality', 'status'],
                'tone': 'sophisticated',
                'pain_points': ['quality concerns', 'exclusivity', 'time constraints']
            }
        }

        self.ad_templates = {
            'tech_enthusiasts': [
                "Revolutionary {product} - {benefit}. Experience cutting-edge {feature}.",
                "Breakthrough {product} technology. {benefit} with advanced {feature}.",
                "Next-gen {product}: {benefit}. Powered by innovative {feature}."
            ],
            'budget_conscious': [
                "Save {discount}% on {product}! Get {benefit} without breaking the bank.",
                "Affordable {product} that delivers {benefit}. Starting at ${price}.",
                "Best value {product} - {benefit} at an unbeatable price!"
            ],
            'millennials': [
                "Transform your {lifestyle} with {product}. {benefit} that fits your vibe.",
                "Authentic {product} for real results. Experience {benefit} today.",
                "Sustainable {product} choice. {benefit} while caring for the planet."
            ],
            'small_business': [
                "Grow your business with {product}. Achieve {benefit} and boost ROI.",
                "Professional {product} solution. Streamline operations and {benefit}.",
                "Scale faster with {product}. Proven to deliver {benefit}."
            ],
            'luxury_seekers': [
                "Premium {product} collection. Exclusive {benefit} for discerning clients.",
                "Luxury {product} experience. Indulge in unparalleled {benefit}.",
                "Elite {product} - where sophistication meets {benefit}."
            ]
        }

    def generate_sample_performance_data(self, days=30):
        """Generate sample performance data for different audience segments"""
        segments = list(self.audience_segments.keys())
        data = []

        for day in range(days):
            date = datetime.now() - timedelta(days=day)
            for segment in segments:
                # Simulate different performance metrics for each segment
                impressions = random.randint(1000, 10000)
                clicks = int(impressions * random.uniform(0.01, 0.05))  # CTR 1-5%
                conversions = int(clicks * random.uniform(0.02, 0.15))  # CVR 2-15%
                cost = clicks * random.uniform(0.5, 3.0)  # CPC $0.5-$3

                data.append({
                    'date': date.strftime('%Y-%m-%d'),
                    'segment': segment,
                    'impressions': impressions,
                    'clicks': clicks,
                    'conversions': conversions,
                    'cost': round(cost, 2),
                    'ctr': round(clicks/impressions * 100, 2),
                    'cvr': round(conversions/clicks * 100, 2) if clicks > 0 else 0,
                    'cpc': round(cost/clicks, 2) if clicks > 0 else 0
                })

        return pd.DataFrame(data)

    def analyze_performance(self, performance_df):
        """Analyze performance data to identify top-performing segments"""
        segment_performance = performance_df.groupby('segment').agg({
            'impressions': 'sum',
            'clicks': 'sum',
            'conversions': 'sum',
            'cost': 'sum',
            'ctr': 'mean',
            'cvr': 'mean',
            'cpc': 'mean'
        }).round(2)

        # Calculate ROI (assuming $50 value per conversion)
        conversion_value = 50
        segment_performance['roi'] = ((segment_performance['conversions'] * conversion_value - segment_performance['cost']) / segment_performance['cost'] * 100).round(2)

        # Rank segments by performance
        segment_performance['performance_score'] = (
            segment_performance['ctr'] * 0.3 +
            segment_performance['cvr'] * 0.4 +
            segment_performance['roi'] * 0.003
        ).round(2)

        return segment_performance.sort_values('performance_score', ascending=False)

    def generate_ad_copy(self, product, segment, performance_data=None, num_variants=3):
        """Generate tailored ad copy for specific audience segment"""
        segment_info = self.audience_segments.get(segment, self.audience_segments['millennials'])
        templates = self.ad_templates.get(segment, self.ad_templates['millennials'])

        # Define product-specific variables
        product_vars = {
            'software': {
                'benefits': ['boost productivity', 'save time', 'increase efficiency', 'streamline workflow'],
                'features': ['AI automation', 'cloud integration', 'real-time analytics', 'mobile app'],
                'price': '99'
            },
            'fitness': {
                'benefits': ['get fit fast', 'build strength', 'lose weight', 'improve health'],
                'features': ['personal training', 'nutrition plans', 'progress tracking', '24/7 support'],
                'price': '49'
            },
            'ecommerce': {
                'benefits': ['increase sales', 'reach more customers', 'grow revenue', 'expand globally'],
                'features': ['payment processing', 'inventory management', 'marketing tools', 'analytics'],
                'price': '29'
            }
        }

        vars_info = product_vars.get(product.lower(), product_vars['software'])

        ad_copies = []
        for i in range(num_variants):
            template = random.choice(templates)

            # Fill in template variables
            ad_copy = template.format(
                product=product.title(),
                benefit=random.choice(vars_info['benefits']),
                feature=random.choice(vars_info['features']),
                discount=random.choice([10, 15, 20, 25, 30]),
                price=vars_info['price'],
                lifestyle=random.choice(['lifestyle', 'routine', 'workflow', 'business'])
            )

            # Add call-to-action based on segment
            cta_map = {
                'tech_enthusiasts': ['Try Free Trial', 'Get Early Access', 'Download Now'],
                'budget_conscious': ['Get Deal Now', 'Save Today', 'Claim Discount'],
                'millennials': ['Start Journey', 'Join Community', 'Experience Now'],
                'small_business': ['Start Free Trial', 'Book Demo', 'Get Quote'],
                'luxury_seekers': ['Explore Collection', 'Request Consultation', 'Discover More']
            }

            cta = random.choice(cta_map.get(segment, ['Learn More', 'Get Started']))

            ad_copies.append({
                'headline': ad_copy,
                'cta': cta,
                'segment': segment,
                'tone': segment_info['tone'],
                'target_pain_point': random.choice(segment_info['pain_points'])
            })

        return ad_copies

    def optimize_based_on_performance(self, performance_df, product):
        """Generate optimized ad copy based on performance data"""
        top_segments = self.analyze_performance(performance_df).head(3).index.tolist()

        optimized_campaigns = {}
        for segment in top_segments:
            segment_perf = performance_df[performance_df['segment'] == segment]
            avg_ctr = segment_perf['ctr'].mean()
            avg_cvr = segment_perf['cvr'].mean()

            # Generate more variants for better performing segments
            num_variants = 5 if avg_ctr > 3.0 else 3

            ad_copies = self.generate_ad_copy(product, segment, num_variants=num_variants)

            optimized_campaigns[segment] = {
                'ad_copies': ad_copies,
                'performance_metrics': {
                    'avg_ctr': round(avg_ctr, 2),
                    'avg_cvr': round(avg_cvr, 2),
                    'total_conversions': int(segment_perf['conversions'].sum())
                }
            }

        return optimized_campaigns

    def create_advanced_visualizations(self, performance_df):
        """Create comprehensive scientific visualizations"""
        print("📊 Generating Advanced Performance Analytics...")

        # 1. Performance Correlation Matrix
        self._create_correlation_heatmap(performance_df)

        # 2. Time Series Analysis
        self._create_time_series_analysis(performance_df)

        # 3. Statistical Distribution Analysis
        self._create_distribution_analysis(performance_df)

        # 4. Efficiency Frontier Analysis
        self._create_efficiency_frontier(performance_df)

        # 5. Segment Performance Radar Chart
        self._create_radar_chart(performance_df)

        # 6. ROI vs Risk Analysis
        self._create_roi_risk_analysis(performance_df)

        # 7. Conversion Funnel Analysis
        self._create_funnel_analysis(performance_df)

        # 8. Statistical Significance Testing
        self._perform_statistical_tests(performance_df)

    def _create_correlation_heatmap(self, df):
        """Create correlation matrix with statistical significance"""
        # Calculate correlations
        metrics = ['impressions', 'clicks', 'conversions', 'cost', 'ctr', 'cvr', 'cpc']
        corr_data = df[metrics].corr()

        # Calculate p-values for correlations
        p_values = np.zeros_like(corr_data)
        for i in range(len(metrics)):
            for j in range(len(metrics)):
                if i != j:
                    _, p_val = pearsonr(df[metrics[i]], df[metrics[j]])
                    p_values[i, j] = p_val

        # Create heatmap with Plotly
        fig = go.Figure(data=go.Heatmap(
            z=corr_data.values,
            x=corr_data.columns,
            y=corr_data.columns,
            colorscale='RdBu',
            zmid=0,
            text=np.round(corr_data.values, 3),
            texttemplate='%{text}',
            textfont={"size": 10},
            hovertemplate='<b>%{y} vs %{x}</b><br>Correlation: %{z:.3f}<extra></extra>'
        ))

        fig.update_layout(
            title='Performance Metrics Correlation Matrix<br><sub>Statistical relationships between key metrics</sub>',
            width=700,
            height=600,
            font=dict(size=12)
        )

        fig.show()

    def _create_time_series_analysis(self, df):
        """Create time series analysis with trend lines"""
        df['date'] = pd.to_datetime(df['date'])

        # Create subplot with multiple metrics
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Click-Through Rate Trends', 'Conversion Rate Trends',
                          'Cost Per Click Trends', 'ROI Trends'),
            specs=[[{"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"secondary_y": False}]]
        )

        colors = px.colors.qualitative.Set1

        for i, segment in enumerate(df['segment'].unique()):
            segment_data = df[df['segment'] == segment].sort_values('date')

            # CTR trend
            fig.add_trace(go.Scatter(
                x=segment_data['date'], y=segment_data['ctr'],
                mode='lines+markers', name=f'{segment.replace("_", " ").title()}',
                line=dict(color=colors[i]), showlegend=True
            ), row=1, col=1)

            # CVR trend
            fig.add_trace(go.Scatter(
                x=segment_data['date'], y=segment_data['cvr'],
                mode='lines+markers', name=f'{segment.replace("_", " ").title()}',
                line=dict(color=colors[i]), showlegend=False
            ), row=1, col=2)

            # CPC trend
            fig.add_trace(go.Scatter(
                x=segment_data['date'], y=segment_data['cpc'],
                mode='lines+markers', name=f'{segment.replace("_", " ").title()}',
                line=dict(color=colors[i]), showlegend=False
            ), row=2, col=1)

            # ROI calculation and trend
            roi_data = ((segment_data['conversions'] * 50 - segment_data['cost']) / segment_data['cost'] * 100)
            fig.add_trace(go.Scatter(
                x=segment_data['date'], y=roi_data,
                mode='lines+markers', name=f'{segment.replace("_", " ").title()}',
                line=dict(color=colors[i]), showlegend=False
            ), row=2, col=2)

        fig.update_layout(
            title='Time Series Performance Analysis<br><sub>Trends and patterns across audience segments</sub>',
            height=800,
            showlegend=True
        )

        fig.update_xaxes(title_text="Date")
        fig.update_yaxes(title_text="CTR (%)", row=1, col=1)
        fig.update_yaxes(title_text="CVR (%)", row=1, col=2)
        fig.update_yaxes(title_text="CPC ($)", row=2, col=1)
        fig.update_yaxes(title_text="ROI (%)", row=2, col=2)

        fig.show()

    def _create_distribution_analysis(self, df):
        """Create statistical distribution analysis"""
        # Create distribution plots for key metrics
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('CTR Distribution by Segment', 'CVR Distribution by Segment',
                          'CPC Distribution by Segment', 'Conversion Distribution by Segment')
        )

        segments = df['segment'].unique()
        colors = px.colors.qualitative.Set1

        for i, segment in enumerate(segments):
            segment_data = df[df['segment'] == segment]

            # CTR distribution
            fig.add_trace(go.Histogram(
                x=segment_data['ctr'], name=segment.replace('_', ' ').title(),
                opacity=0.7, nbinsx=20, histnorm='probability',
                marker_color=colors[i], showlegend=(i==0)
            ), row=1, col=1)

            # CVR distribution
            fig.add_trace(go.Histogram(
                x=segment_data['cvr'], name=segment.replace('_', ' ').title(),
                opacity=0.7, nbinsx=20, histnorm='probability',
                marker_color=colors[i], showlegend=False
            ), row=1, col=2)

            # CPC distribution
            fig.add_trace(go.Histogram(
                x=segment_data['cpc'], name=segment.replace('_', ' ').title(),
                opacity=0.7, nbinsx=20, histnorm='probability',
                marker_color=colors[i], showlegend=False
            ), row=2, col=1)

            # Conversions distribution
            fig.add_trace(go.Histogram(
                x=segment_data['conversions'], name=segment.replace('_', ' ').title(),
                opacity=0.7, nbinsx=20, histnorm='probability',
                marker_color=colors[i], showlegend=False
            ), row=2, col=2)

        fig.update_layout(
            title='Statistical Distribution Analysis<br><sub>Probability distributions of key performance metrics</sub>',
            height=800,
            barmode='overlay'
        )

        fig.show()

    def _create_efficiency_frontier(self, df):
        """Create efficiency frontier analysis (Risk vs Return)"""
        segment_stats = df.groupby('segment').agg({
            'ctr': ['mean', 'std'],
            'cvr': ['mean', 'std'],
            'cost': 'sum',
            'conversions': 'sum'
        }).round(3)

        # Calculate ROI and volatility for each segment
        roi_data = []
        volatility_data = []
        segment_names = []

        for segment in df['segment'].unique():
            segment_data = df[df['segment'] == segment]
            daily_roi = ((segment_data['conversions'] * 50 - segment_data['cost']) / segment_data['cost'] * 100)

            roi_data.append(daily_roi.mean())
            volatility_data.append(daily_roi.std())
            segment_names.append(segment.replace('_', ' ').title())

        # Create scatter plot
        fig = go.Figure(data=go.Scatter(
            x=volatility_data,
            y=roi_data,
            mode='markers+text',
            text=segment_names,
            textposition="top center",
            marker=dict(
                size=15,
                color=roi_data,
                colorscale='Viridis',
                showscale=True,
                colorbar=dict(title="ROI (%)")
            ),
            hovertemplate='<b>%{text}</b><br>Volatility: %{x:.2f}<br>ROI: %{y:.2f}%<extra></extra>'
        ))

        fig.update_layout(
            title='Efficiency Frontier Analysis<br><sub>Risk (Volatility) vs Return (ROI) by Segment</sub>',
            xaxis_title='Risk (ROI Volatility)',
            yaxis_title='Return (Average ROI %)',
            width=800,
            height=600
        )

        # Add quadrant lines
        fig.add_hline(y=np.mean(roi_data), line_dash="dash", line_color="gray", opacity=0.5)
        fig.add_vline(x=np.mean(volatility_data), line_dash="dash", line_color="gray", opacity=0.5)

        fig.show()

    def _create_radar_chart(self, df):
        """Create radar chart for segment performance comparison"""
        # Calculate normalized metrics for each segment
        segment_metrics = df.groupby('segment').agg({
            'ctr': 'mean',
            'cvr': 'mean',
            'impressions': 'mean',
            'conversions': 'sum',
            'cost': 'sum'
        })

        # Calculate additional metrics
        segment_metrics['cpc'] = segment_metrics['cost'] / (segment_metrics['impressions'] * segment_metrics['ctr'] / 100)
        segment_metrics['roi'] = ((segment_metrics['conversions'] * 50 - segment_metrics['cost']) / segment_metrics['cost'] * 100)

        # Normalize metrics to 0-100 scale
        metrics_to_plot = ['ctr', 'cvr', 'impressions', 'conversions', 'roi']
        normalized_data = segment_metrics[metrics_to_plot].copy()

        for col in metrics_to_plot:
            if col in ['cpc']:  # For metrics where lower is better
                normalized_data[col] = 100 - ((normalized_data[col] - normalized_data[col].min()) /
                                            (normalized_data[col].max() - normalized_data[col].min()) * 100)
            else:  # For metrics where higher is better
                normalized_data[col] = ((normalized_data[col] - normalized_data[col].min()) /
                                      (normalized_data[col].max() - normalized_data[col].min()) * 100)

        # Create radar chart
        fig = go.Figure()

        colors = px.colors.qualitative.Set1
        for i, segment in enumerate(normalized_data.index):
            fig.add_trace(go.Scatterpolar(
                r=normalized_data.loc[segment].values,
                theta=[m.upper().replace('_', ' ') for m in metrics_to_plot],
                fill='toself',
                name=segment.replace('_', ' ').title(),
                line_color=colors[i]
            ))

        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 100]
                )),
            title='Segment Performance Radar Chart<br><sub>Normalized performance across key metrics (0-100 scale)</sub>',
            showlegend=True,
            width=700,
            height=700
        )

        fig.show()

    def _create_roi_risk_analysis(self, df):
        """Create ROI vs Risk bubble chart with statistical confidence intervals"""
        segment_analysis = []

        for segment in df['segment'].unique():
            segment_data = df[df['segment'] == segment]
            daily_roi = ((segment_data['conversions'] * 50 - segment_data['cost']) / segment_data['cost'] * 100)

            # Calculate confidence interval
            confidence_interval = stats.t.interval(0.95, len(daily_roi)-1,
                                                 loc=np.mean(daily_roi),
                                                 scale=stats.sem(daily_roi))

            segment_analysis.append({
                'segment': segment.replace('_', ' ').title(),
                'avg_roi': daily_roi.mean(),
                'roi_std': daily_roi.std(),
                'total_spend': segment_data['cost'].sum(),
                'total_conversions': segment_data['conversions'].sum(),
                'ci_lower': confidence_interval[0],
                'ci_upper': confidence_interval[1]
            })

        analysis_df = pd.DataFrame(segment_analysis)

        # Create bubble chart
        fig = go.Figure()

        for _, row in analysis_df.iterrows():
            fig.add_trace(go.Scatter(
                x=[row['roi_std']],
                y=[row['avg_roi']],
                mode='markers',
                marker=dict(
                    size=np.sqrt(row['total_spend']) / 5,  # Size based on spend
                    color=row['total_conversions'],
                    colorscale='Plasma',
                    showscale=True,
                    colorbar=dict(title="Total Conversions"),
                    line=dict(width=2, color='white')
                ),
                name=row['segment'],
                text=f"Segment: {row['segment']}<br>" +
                     f"ROI: {row['avg_roi']:.1f}% ± {row['roi_std']:.1f}<br>" +
                     f"95% CI: [{row['ci_lower']:.1f}, {row['ci_upper']:.1f}]<br>" +
                     f"Total Spend: ${row['total_spend']:.0f}<br>" +
                     f"Conversions: {row['total_conversions']}",
                hovertemplate='%{text}<extra></extra>'
            ))

        fig.update_layout(
            title='ROI vs Risk Analysis with Confidence Intervals<br><sub>Bubble size = Total Spend, Color = Conversions</sub>',
            xaxis_title='Risk (ROI Standard Deviation)',
            yaxis_title='Return (Average ROI %)',
            width=900,
            height=600,
            showlegend=True
        )

        fig.show()

    def _create_funnel_analysis(self, df):
        """Create conversion funnel analysis"""
        # Calculate funnel metrics for each segment
        funnel_data = df.groupby('segment').agg({
            'impressions': 'sum',
            'clicks': 'sum',
            'conversions': 'sum'
        })

        # Create funnel chart
        fig = go.Figure()

        segments = funnel_data.index
        colors = px.colors.qualitative.Set1

        for i, segment in enumerate(segments):
            impressions = funnel_data.loc[segment, 'impressions']
            clicks = funnel_data.loc[segment, 'clicks']
            conversions = funnel_data.loc[segment, 'conversions']

            fig.add_trace(go.Funnel(
                y=['Impressions', 'Clicks', 'Conversions'],
                x=[impressions, clicks, conversions],
                name=segment.replace('_', ' ').title(),
                textinfo="value+percent initial",
                opacity=0.7,
                marker_color=colors[i]
            ))

        fig.update_layout(
            title='Conversion Funnel Analysis by Segment<br><sub>User journey from impression to conversion</sub>',
            height=600,
            showlegend=True
        )

        fig.show()

    def _perform_statistical_tests(self, df):
        """Perform statistical significance tests"""
        print("\n📈 STATISTICAL SIGNIFICANCE ANALYSIS")
        print("=" * 50)

        # Chi-square test for segment vs conversion relationship
        contingency_table = pd.crosstab(df['segment'], df['conversions'] > df['conversions'].median())
        chi2, p_val, dof, expected = chi2_contingency(contingency_table)

        print(f"Chi-Square Test (Segment vs High Conversions):")
        print(f"Chi-square statistic: {chi2:.4f}")
        print(f"P-value: {p_val:.6f}")
        print(f"Degrees of freedom: {dof}")

        if p_val < 0.05:
            print("✅ Result: Statistically significant relationship between segment and conversion performance")
        else:
            print("❌ Result: No statistically significant relationship found")

        print("\n" + "-" * 30)

        # ANOVA test for CTR differences between segments
        segment_groups = [df[df['segment'] == seg]['ctr'].values for seg in df['segment'].unique()]
        f_stat, p_val_anova = stats.f_oneway(*segment_groups)

        print(f"ANOVA Test (CTR differences between segments):")
        print(f"F-statistic: {f_stat:.4f}")
        print(f"P-value: {p_val_anova:.6f}")

        if p_val_anova < 0.05:
            print("✅ Result: Significant differences in CTR between segments")
        else:
            print("❌ Result: No significant differences in CTR between segments")

        # Calculate effect size (Cohen's f)
        overall_mean = df['ctr'].mean()
        segment_means = df.groupby('segment')['ctr'].mean()
        pooled_std = df['ctr'].std()

        cohens_f = np.sqrt(np.sum(df.groupby('segment').size() * (segment_means - overall_mean)**2) /
                          (len(df) * pooled_std**2))

        print(f"Effect Size (Cohen's f): {cohens_f:.4f}")

        if cohens_f < 0.1:
            effect_size = "Small"
        elif cohens_f < 0.25:
            effect_size = "Medium"
        else:
            effect_size = "Large"

        print(f"Effect Size Interpretation: {effect_size}")

        return {
            'chi_square': {'statistic': chi2, 'p_value': p_val},
            'anova': {'f_statistic': f_stat, 'p_value': p_val_anova},
            'effect_size': {'cohens_f': cohens_f, 'interpretation': effect_size}
        }

    def visualize_performance(self, performance_df):
        """Legacy method - redirects to advanced visualizations"""
        print("🔄 Upgrading to advanced scientific visualizations...")
        self.create_advanced_visualizations(performance_df)

# Initialize the AI Copywriter
copywriter = AdCopywriterAI()

print("🚀 GenAI Ad Copywriter for Google Ads - Initialized!")
print("=" * 60)

# Generate sample performance data
print("📊 Generating sample performance data...")
performance_data = copywriter.generate_sample_performance_data(days=30)
print(f"Generated {len(performance_data)} data points across {len(copywriter.audience_segments)} segments")
print("\nSample data:")
print(performance_data.head())

print("\n" + "=" * 60)

# Analyze performance
print("📈 Analyzing segment performance...")
segment_analysis = copywriter.analyze_performance(performance_data)
print("\nSegment Performance Rankings:")
print(segment_analysis)

print("\n" + "=" * 60)

# Generate ad copy for different products and segments
products = ['Software', 'Fitness', 'Ecommerce']

for product in products:
    print(f"\n✨ Generating ad copy for {product}...")

    # Generate optimized campaigns based on performance
    optimized_campaigns = copywriter.optimize_based_on_performance(performance_data, product)

    for segment, campaign_data in optimized_campaigns.items():
        print(f"\n🎯 {segment.replace('_', ' ').title()} Segment:")
        print(f"Performance: CTR {campaign_data['performance_metrics']['avg_ctr']}%, "
              f"CVR {campaign_data['performance_metrics']['avg_cvr']}%")

        for i, ad in enumerate(campaign_data['ad_copies'], 1):
            print(f"\nVariant {i}:")
            print(f"  Headline: {ad['headline']}")
            print(f"  CTA: {ad['cta']}")
            print(f"  Target Pain Point: {ad['target_pain_point']}")
            print(f"  Tone: {ad['tone']}")

print("\n" + "=" * 60)

# Create performance visualizations
print("📊 Creating performance visualizations...")
copywriter.visualize_performance(performance_data)

print("\n" + "=" * 60)

# A/B Test Simulator
class ABTestSimulator:
    def __init__(self, copywriter):
        self.copywriter = copywriter

    def run_ab_test(self, product, segment, test_duration_days=14):
        """Simulate A/B test between different ad variants"""
        print(f"\n🧪 Running A/B Test for {product} - {segment.replace('_', ' ').title()} Segment")
        print(f"Test Duration: {test_duration_days} days")

        # Generate 3 ad variants
        variants = self.copywriter.generate_ad_copy(product, segment, num_variants=3)

        results = []
        for i, variant in enumerate(variants, 1):
            # Simulate performance with some randomness
            base_ctr = random.uniform(2.0, 5.0)
            base_cvr = random.uniform(3.0, 12.0)

            # Adjust based on segment characteristics
            if segment == 'tech_enthusiasts':
                base_ctr *= 1.2  # Tech users click more
            elif segment == 'luxury_seekers':
                base_cvr *= 1.3  # Higher conversion but lower clicks
                base_ctr *= 0.8

            impressions = random.randint(5000, 15000)
            clicks = int(impressions * base_ctr / 100)
            conversions = int(clicks * base_cvr / 100)
            cost = clicks * random.uniform(1.0, 2.5)

            results.append({
                'variant': f'Variant {i}',
                'headline': variant['headline'],
                'cta': variant['cta'],
                'impressions': impressions,
                'clicks': clicks,
                'conversions': conversions,
                'cost': round(cost, 2),
                'ctr': round(clicks/impressions * 100, 2),
                'cvr': round(conversions/clicks * 100, 2) if clicks > 0 else 0,
                'cpc': round(cost/clicks, 2) if clicks > 0 else 0,
                'cost_per_conversion': round(cost/conversions, 2) if conversions > 0 else 0
            })

        # Display results
        test_df = pd.DataFrame(results)
        print("\nA/B Test Results:")
        print(test_df[['variant', 'ctr', 'cvr', 'cost_per_conversion']])

        # Find winner
        winner_idx = test_df['cvr'].idxmax()
        winner = test_df.loc[winner_idx]

        print(f"\n🏆 Winner: {winner['variant']}")
        print(f"   Headline: {winner['headline']}")
        print(f"   CTA: {winner['cta']}")
        print(f"   CVR: {winner['cvr']}%")
        print(f"   Cost per Conversion: ${winner['cost_per_conversion']}")

        # Add machine learning insights
        self._create_ml_insights(performance_data)

        return test_df

    def _create_ml_insights(self, df):
        """Create ML-based insights and predictions"""
        print("\n🤖 MACHINE LEARNING INSIGHTS")
        print("=" * 50)

        # Prepare data for clustering
        features = ['ctr', 'cvr', 'cpc', 'cost']
        X = df[features].fillna(0)

        # Standardize features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # Perform K-means clustering
        kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
        clusters = kmeans.fit_predict(X_scaled)
        df_clustered = df.copy()
        df_clustered['performance_cluster'] = clusters

        # Create cluster analysis visualization
        fig = go.Figure()

        cluster_colors = ['red', 'blue', 'green']
        cluster_names = ['Low Performers', 'Medium Performers', 'High Performers']

        for i in range(3):
            cluster_data = df_clustered[df_clustered['performance_cluster'] == i]
            fig.add_trace(go.Scatter3d(
                x=cluster_data['ctr'],
                y=cluster_data['cvr'],
                z=cluster_data['cpc'],
                mode='markers',
                marker=dict(
                    size=5,
                    color=cluster_colors[i],
                    opacity=0.7
                ),
                name=cluster_names[i],
                text=cluster_data['segment'],
                hovertemplate='<b>%{text}</b><br>' +
                            'CTR: %{x:.2f}%<br>' +
                            'CVR: %{y:.2f}%<br>' +
                            'CPC: $%{z:.2f}<extra></extra>'
            ))

        fig.update_layout(
            title='3D Performance Clustering Analysis<br><sub>ML-based segmentation of ad performance</sub>',
            scene=dict(
                xaxis_title='Click-Through Rate (%)',
                yaxis_title='Conversion Rate (%)',
                zaxis_title='Cost Per Click ($)'
            ),
            width=900,
            height=700
        )

        fig.show()

        # Print cluster insights
        for i in range(3):
            cluster_data = df_clustered[df_clustered['performance_cluster'] == i]
            print(f"\n{cluster_names[i]} Characteristics:")
            print(f"  Average CTR: {cluster_data['ctr'].mean():.2f}%")
            print(f"  Average CVR: {cluster_data['cvr'].mean():.2f}%")
            print(f"  Average CPC: ${cluster_data['cpc'].mean():.2f}")
            print(f"  Dominant Segments: {cluster_data['segment'].value_counts().head(2).to_dict()}")

    def create_performance_prediction_model(self, df):
        """Create predictive model for performance forecasting"""
        print("\n🔮 PREDICTIVE MODELING")
        print("=" * 50)

        from sklearn.ensemble import RandomForestRegressor
        from sklearn.model_selection import train_test_split
        from sklearn.metrics import mean_absolute_error, r2_score

        # Prepare features for prediction
        df_encoded = df.copy()

        # Encode categorical variables
        segment_dummies = pd.get_dummies(df_encoded['segment'], prefix='segment')
        df_encoded = pd.concat([df_encoded, segment_dummies], axis=1)

        # Select features and target
        feature_cols = ['impressions', 'clicks', 'cost', 'ctr', 'cpc'] + list(segment_dummies.columns)
        X = df_encoded[feature_cols].fillna(0)
        y = df_encoded['conversions']

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Train model
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        rf_model.fit(X_train, y_train)

        # Make predictions
        y_pred = rf_model.predict(X_test)

        # Calculate metrics
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        print(f"Model Performance:")
        print(f"  Mean Absolute Error: {mae:.2f}")
        print(f"  R² Score: {r2:.3f}")

        # Feature importance
        feature_importance = pd.DataFrame({
            'feature': feature_cols,
            'importance': rf_model.feature_importances_
        }).sort_values('importance', ascending=False)

        # Create feature importance plot
        fig = go.Figure(data=go.Bar(
            x=feature_importance['importance'][:10],
            y=feature_importance['feature'][:10],
            orientation='h',
            marker_color='lightblue'
        ))

        fig.update_layout(
            title='Feature Importance for Conversion Prediction<br><sub>Top 10 most predictive features</sub>',
            xaxis_title='Importance Score',
            yaxis_title='Features',
            height=500
        )

        fig.show()

        return rf_model, feature_importance

# Run A/B Tests
ab_tester = ABTestSimulator(copywriter)

print("\n" + "=" * 60)
print("🧪 A/B TESTING SIMULATION")
print("=" * 60)

# Test different combinations
test_combinations = [
    ('Software', 'tech_enthusiasts'),
    ('Fitness', 'millennials'),
    ('Ecommerce', 'small_business')
]

for product, segment in test_combinations:
    ab_tester.run_ab_test(product, segment)
    print("\n" + "-" * 40)


🚀 GenAI Ad Copywriter for Google Ads - Initialized!
📊 Generating sample performance data...
Generated 150 data points across 5 segments

Sample data:
         date           segment  impressions  clicks  conversions    cost  \
0  2025-08-24  tech_enthusiasts         5104     197           20  155.89   
1  2025-08-24  budget_conscious         9351     237            5  326.67   
2  2025-08-24       millennials         2401     100            2   60.92   
3  2025-08-24    small_business         6194     135           12  113.22   
4  2025-08-24    luxury_seekers         7860     262           11  374.80   

    ctr    cvr   cpc  
0  3.86  10.15  0.79  
1  2.53   2.11  1.38  
2  4.16   2.00  0.61  
3  2.18   8.89  0.84  
4  3.33   4.20  1.43  

📈 Analyzing segment performance...

Segment Performance Rankings:
                  impressions  clicks  conversions      cost   ctr   cvr  \
segment                                                                    
small_business         164442 


📈 STATISTICAL SIGNIFICANCE ANALYSIS
Chi-Square Test (Segment vs High Conversions):
Chi-square statistic: 3.4667
P-value: 0.482965
Degrees of freedom: 4
❌ Result: No statistically significant relationship found

------------------------------
ANOVA Test (CTR differences between segments):
F-statistic: 0.4811
P-value: 0.749564
❌ Result: No significant differences in CTR between segments
Effect Size (Cohen's f): 0.1141
Effect Size Interpretation: Medium


🧪 A/B TESTING SIMULATION

🧪 Running A/B Test for Software - Tech Enthusiasts Segment
Test Duration: 14 days

A/B Test Results:
     variant   ctr   cvr  cost_per_conversion
0  Variant 1  4.03  9.19                11.53
1  Variant 2  3.59  7.87                23.04
2  Variant 3  5.27  3.77                29.81

🏆 Winner: Variant 1
   Headline: Breakthrough Software technology. streamline workflow with advanced real-time analytics.
   CTA: Get Early Access
   CVR: 9.19%
   Cost per Conversion: $11.53

🤖 MACHINE LEARNING INSIGHTS



Low Performers Characteristics:
  Average CTR: 4.16%
  Average CVR: 6.42%
  Average CPC: $2.33
  Dominant Segments: {'millennials': 8, 'tech_enthusiasts': 8}

Medium Performers Characteristics:
  Average CTR: 3.13%
  Average CVR: 6.27%
  Average CPC: $1.14
  Dominant Segments: {'luxury_seekers': 18, 'small_business': 16}

High Performers Characteristics:
  Average CTR: 2.47%
  Average CVR: 10.86%
  Average CPC: $2.21
  Dominant Segments: {'budget_conscious': 14, 'millennials': 10}

----------------------------------------

🧪 Running A/B Test for Fitness - Millennials Segment
Test Duration: 14 days

A/B Test Results:
     variant   ctr   cvr  cost_per_conversion
0  Variant 1  4.13  4.13                25.67
1  Variant 2  3.09  9.89                18.48
2  Variant 3  4.30  6.40                20.10

🏆 Winner: Variant 2
   Headline: Transform your routine with Fitness. build strength that fits your vibe.
   CTA: Experience Now
   CVR: 9.89%
   Cost per Conversion: $18.48

🤖 MACHINE LEARN


Low Performers Characteristics:
  Average CTR: 4.16%
  Average CVR: 6.42%
  Average CPC: $2.33
  Dominant Segments: {'millennials': 8, 'tech_enthusiasts': 8}

Medium Performers Characteristics:
  Average CTR: 3.13%
  Average CVR: 6.27%
  Average CPC: $1.14
  Dominant Segments: {'luxury_seekers': 18, 'small_business': 16}

High Performers Characteristics:
  Average CTR: 2.47%
  Average CVR: 10.86%
  Average CPC: $2.21
  Dominant Segments: {'budget_conscious': 14, 'millennials': 10}

----------------------------------------

🧪 Running A/B Test for Ecommerce - Small Business Segment
Test Duration: 14 days

A/B Test Results:
     variant   ctr    cvr  cost_per_conversion
0  Variant 1  2.21   3.70                60.56
1  Variant 2  2.36  11.07                16.82
2  Variant 3  3.66  10.80                18.08

🏆 Winner: Variant 2
   Headline: Scale faster with Ecommerce. Proven to deliver increase sales.
   CTA: Book Demo
   CVR: 11.07%
   Cost per Conversion: $16.82

🤖 MACHINE LEARNING I


Low Performers Characteristics:
  Average CTR: 4.16%
  Average CVR: 6.42%
  Average CPC: $2.33
  Dominant Segments: {'millennials': 8, 'tech_enthusiasts': 8}

Medium Performers Characteristics:
  Average CTR: 3.13%
  Average CVR: 6.27%
  Average CPC: $1.14
  Dominant Segments: {'luxury_seekers': 18, 'small_business': 16}

High Performers Characteristics:
  Average CTR: 2.47%
  Average CVR: 10.86%
  Average CPC: $2.21
  Dominant Segments: {'budget_conscious': 14, 'millennials': 10}

----------------------------------------
