In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols 
import statsmodels.stats.api as stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
df = pd.read_csv(r'C:/Users/Andrei.Baidurov/Marketing_Proyecto/data/marketingcampaigns_clean.csv')

In [5]:
df.head(5)

Unnamed: 0,campaign_name,start_date,end_date,budget,roi,type,target_audience,channel,conversion_rate,revenue,campaign_duration
0,Public-key multi-tasking throughput,2023-04-01 00:00:00,2024-02-23,8082.3,8680%,email,B2B,organic,0.4,709593.48,328
1,De-engineered analyzing task-force,2023-02-15 00:00:00,2024-04-22,17712.98,2817%,email,B2C,promotion,0.66,516609.1,432
2,Balanced solution-oriented Local Area Network,2022-12-20 00:00:00,2023-10-11,84643.1,441%,podcast,B2B,paid,0.28,458227.42,295
3,Distributed real-time methodology,2022-09-26 00:00:00,2023-09-27,14589.75,517%,webinar,B2B,organic,0.19,89958.73,366
4,Front-line executive infrastructure,2023-07-07 00:00:00,2024-05-15,39291.9,21%,social media,B2B,promotion,0.81,47511.35,313


In [6]:
df['net_profit'] = df['revenue'] - df['budget']

# Display the first few rows to verify the new column
df[['campaign_name', 'revenue', 'budget', 'net_profit']].head()

Unnamed: 0,campaign_name,revenue,budget,net_profit
0,Public-key multi-tasking throughput,709593.48,8082.3,701511.18
1,De-engineered analyzing task-force,516609.1,17712.98,498896.12
2,Balanced solution-oriented Local Area Network,458227.42,84643.1,373584.32
3,Distributed real-time methodology,89958.73,14589.75,75368.98
4,Front-line executive infrastructure,47511.35,39291.9,8219.45


¿Qué canal de marketing se utiliza con mayor frecuencia y cuál genera mejor ROI?

In [8]:
from plotly.subplots import make_subplots

# Import plotly if not already imported
import plotly.express as px
import plotly.graph_objects as go

# Use a pastel color palette
pastel_colors = ['rgba(255, 182, 193, 0.7)', 'rgba(173, 216, 230, 0.7)', 
                'rgba(144, 238, 144, 0.7)', 'rgba(255, 218, 185, 0.7)']

# Create interactive subplots
fig = make_subplots(rows=1, cols=2, 
                   subplot_titles=['Frecuencia de uso de canales de marketing', 
                                  'ROI promedio por canal de marketing'])

# Plot channel frequency
fig.add_trace(
    go.Bar(
        x=channel_counts.index, 
        y=channel_counts.values,
        text=channel_counts.values,
        textposition='auto',
        marker_color=pastel_colors,
        hovertemplate='Canal: %{x}<br>Frecuencia: %{y}<extra></extra>'
    ),
    row=1, col=1
)

# Plot average ROI by channel
fig.add_trace(
    go.Bar(
        x=roi_by_channel.index, 
        y=roi_by_channel.values,
        text=[f'{val:.2%}' for val in roi_by_channel.values],
        textposition='auto',
        marker_color=pastel_colors,
        hovertemplate='Canal: %{x}<br>ROI: %{text}<extra></extra>'
    ),
    row=1, col=2
)

# Update layout
fig.update_layout(
    height=600, 
    width=1000,
    showlegend=False,
    title_text='Análisis de Canales de Marketing',
    template='simple_white'
)

# Add axis labels
fig.update_yaxes(title_text="Frecuencia", row=1, col=1)
fig.update_yaxes(title_text="ROI promedio", row=1, col=2)
fig.update_xaxes(title_text="Canal", row=1, col=1)
fig.update_xaxes(title_text="Canal", row=1, col=2)

# Show the figure
fig.show()

¿Qué tipo de campaña genera más ingresos en promedio y cuál tiene mejor conversión?

In [9]:
# Group by type and calculate metrics
avg_revenue_by_type = df.groupby('type')['revenue'].mean().sort_values(ascending=False)
avg_conversion_by_type = df.groupby('type')['conversion_rate'].mean().sort_values(ascending=False)

# Create interactive subplots
fig = make_subplots(rows=1, cols=2, 
                   subplot_titles=['Ingreso promedio por tipo de campaña', 
                                  'Tasa de conversión promedio por tipo de campaña'])

# Plot average revenue by campaign type
fig.add_trace(
    go.Bar(
        x=avg_revenue_by_type.index, 
        y=avg_revenue_by_type.values,
        text=[f'${val:,.2f}' for val in avg_revenue_by_type.values],
        textposition='auto',
        marker_color=pastel_colors[:len(avg_revenue_by_type)],
        hovertemplate='Tipo: %{x}<br>Ingreso promedio: %{text}<extra></extra>'
    ),
    row=1, col=1
)

# Plot average conversion rate by campaign type
fig.add_trace(
    go.Bar(
        x=avg_conversion_by_type.index, 
        y=avg_conversion_by_type.values,
        text=[f'{val:.2%}' for val in avg_conversion_by_type.values],
        textposition='auto',
        marker_color=pastel_colors[:len(avg_conversion_by_type)],
        hovertemplate='Tipo: %{x}<br>Tasa de conversión: %{text}<extra></extra>'
    ),
    row=1, col=2
)

# Update layout
fig.update_layout(
    height=600, 
    width=1000,
    showlegend=False,
    title_text='Análisis de Tipos de Campaña',
    template='simple_white'
)

# Add axis labels
fig.update_yaxes(title_text="Ingreso promedio ($)", row=1, col=1)
fig.update_yaxes(title_text="Tasa de conversión", row=1, col=2)
fig.update_xaxes(title_text="Tipo de campaña", row=1, col=1)
fig.update_xaxes(title_text="Tipo de campaña", row=1, col=2)

# Show the figure
fig.show()

In [11]:
from plotly.subplots import make_subplots
import numpy as np
from scipy import stats

# Create a figure with multiple subplots to analyze ROI distribution and factors
import plotly.express as px
import plotly.graph_objects as go

# Create a 2x2 subplot figure
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Distribución del ROI',
        'ROI por canal de marketing', 
        'ROI vs. Tasa de conversión',
        'ROI por tipo de campaña'
    ),
    specs=[[{"type": "histogram"}, {"type": "box"}],
           [{"type": "scatter"}, {"type": "box"}]]
)

# 1. ROI Distribution (histogram)
fig.add_trace(
    go.Histogram(
        x=df['roi_numeric'], 
        nbinsx=50,
        marker_color='rgba(255, 182, 193, 0.7)',
        hovertemplate='ROI: %{x:.2f}<br>Frecuencia: %{y}<extra></extra>'
    ),
    row=1, col=1
)

# 2. ROI by Channel (boxplot)
for i, channel in enumerate(df['channel'].unique()):
    fig.add_trace(
        go.Box(
            y=df[df['channel'] == channel]['roi_numeric'],
            name=channel,
            marker_color=pastel_colors[i % len(pastel_colors)],
            hovertemplate='Canal: %{y:.2f}<extra></extra>'
        ),
        row=1, col=2
    )

# 3. ROI vs Conversion Rate (scatter)
fig.add_trace(
    go.Scatter(
        x=df['conversion_rate'],
        y=df['roi_numeric'],
        mode='markers',
        marker=dict(
            size=8,
            color='rgba(173, 216, 230, 0.7)',
            line=dict(width=1, color='rgba(173, 216, 230, 1.0)')
        ),
        hovertemplate='Tasa de conversión: %{x:.2f}<br>ROI: %{y:.2f}<extra></extra>'
    ),
    row=2, col=1
)

# 4. ROI by Campaign Type (boxplot)
for i, campaign_type in enumerate(df['type'].unique()):
    fig.add_trace(
        go.Box(
            y=df[df['type'] == campaign_type]['roi_numeric'],
            name=campaign_type,
            marker_color=pastel_colors[i % len(pastel_colors)],
            hovertemplate='Tipo: %{y:.2f}<extra></extra>'
        ),
        row=2, col=2
    )

# Calculate correlation between ROI and conversion rate
roi_conv_corr = np.corrcoef(df['roi_numeric'], df['conversion_rate'])[0, 1]

# Update layout
fig.update_layout(
    height=800, 
    width=1000,
    title_text='Análisis de ROI y Factores Asociados',
    template='simple_white',
    showlegend=False,
    annotations=[
        dict(
            x=0.5,
            y=-0.1,
            xref="x2 domain",
            yref="y2 domain",
            text=f"Correlación entre ROI y tasa de conversión: {roi_conv_corr:.2f}",
            showarrow=False
        )
    ]
)

# Update axes
fig.update_xaxes(title_text="ROI", row=1, col=1)
fig.update_yaxes(title_text="Frecuencia", row=1, col=1)
fig.update_xaxes(title_text="Canal", row=1, col=2)
fig.update_yaxes(title_text="ROI", row=1, col=2)
fig.update_xaxes(title_text="Tasa de conversión", row=2, col=1)
fig.update_yaxes(title_text="ROI", row=2, col=1)
fig.update_xaxes(title_text="Tipo de campaña", row=2, col=2)
fig.update_yaxes(title_text="ROI", row=2, col=2)

fig.show()