In [None]:
import pandas as pd
import plotly.express as px

# platform dataset
platform_data = pd.read_csv('platform_data.csv')

In [5]:
platform_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2233519 entries, 0 to 2233518
Data columns (total 8 columns):
 #   Column             Dtype 
---  ------             ----- 
 0   campaign_id        int64 
 1   date               object
 2   impressions        int64 
 3   clicks             int64 
 4   video_completions  int64 
 5   device_type        object
 6   advertiser_name    object
 7   source             object
dtypes: int64(4), object(4)
memory usage: 136.3+ MB


In [7]:
# Converting date column to datetime, handling mixed formats
platform_data['date'] = pd.to_datetime(platform_data['date'], format='mixed')

In [8]:
platform_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2233519 entries, 0 to 2233518
Data columns (total 8 columns):
 #   Column             Dtype         
---  ------             -----         
 0   campaign_id        int64         
 1   date               datetime64[ns]
 2   impressions        int64         
 3   clicks             int64         
 4   video_completions  int64         
 5   device_type        object        
 6   advertiser_name    object        
 7   source             object        
dtypes: datetime64[ns](1), int64(4), object(3)
memory usage: 136.3+ MB


### Total Impressions by Platform

In [75]:
# Bar Chart
impressions_by_source = platform_data.groupby('source')['impressions'].sum().reset_index()

In [None]:
import plotly.express as px

impressions_by_source['text'] = impressions_by_source['impressions'].apply(
    lambda x: f'{x/1e9:.1f}B' if x >= 1e9 else f'{x/1e6:.1f}M'
)
platform_colors = {
    'Meta': '#0866FF',    # Blue
    'YouTube': '#F26522', # Orange
    'TikTok': '#444444',  # Dark gray
    'Snapchat': '#c48a47' # Brown
}

# horizontal bar chart
fig = px.bar(impressions_by_source.sort_values('impressions', ascending=False), 
             y='source', 
             x='impressions',
             title='Total Impressions by Platform',
             labels={'source': 'Platform', 'impressions': 'Impressions'},
             color='source',
             text='text',  
             log_x=True,
             orientation='h',
             color_discrete_map=platform_colors) 

fig.update_traces(textposition='outside') 
fig.update_layout(title={'x': 0.5}, width=1000, height=500) 
fig.show()

### CTR (Click-Through Rate) by platform

In [None]:
import plotly.express as px

# Calculate CTR by platform
ctr_by_source = platform_data.groupby('source').agg({'clicks': 'sum', 'impressions': 'sum'}).reset_index()
ctr_by_source['CTR (%)'] = (ctr_by_source['clicks'] / ctr_by_source['impressions']) * 100

ctr_by_source = ctr_by_source.sort_values('CTR (%)', ascending=False)

platform_colors = {
    'Meta': '#0866FF',    
    'YouTube': '#F26522', 
    'TikTok': '#444444',  
    'Snapchat': '#c48a47' 
}

# Horizontal Bar Chart
fig2 = px.bar(ctr_by_source, 
              y='source',          
              x='CTR (%)',          
              title='Click Performance by Platform',  
              labels={'source': 'Platform', 'CTR (%)': ''},  
              color='source',
              text='CTR (%)',
              orientation='h',
              color_discrete_map=platform_colors)
              
fig2.update_traces(texttemplate='%{text:.2f}%', textposition='outside') 
fig2.update_layout(title={'x': 0.5}, width=1000, height=500)
fig2.update_xaxes(showticklabels=False)

fig2.show()

##### This CTR (Click-Through Rate) visualization is an excellent complement to the analysist! It provides a different perspective that reveals platform efficiency rather than just volume.
##### YouTube’s ads are the most engaging with a CTR of 0.46%, while Meta’s ads have the lowest engagement at 0.15%, despite their high impressions

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Calculating video completion metrics for each platform
platform_metrics = []

# Process each platform
for platform_name in platform_data['source'].unique():
    platform_subset = platform_data[platform_data['source'] == platform_name]
    
    # Calculate totals
    platform_impressions = platform_subset['impressions'].sum()
    platform_clicks = platform_subset['clicks'].sum()
    platform_completions = platform_subset['video_completions'].sum()
    
    # Calculate video completion metrics
    platform_metrics.append({
        'Platform': platform_name,
        'Completion per impression': (platform_completions / platform_impressions) * 100,
        'Completions per Click': platform_completions / max(1, platform_clicks)
    })

# Convert to DataFrame
completion_df = pd.DataFrame(platform_metrics)

# Define platform colors
platform_colors = {
    'Meta': '#0866FF',       
    'YouTube': '#F26522',    
    'TikTok': '#444444',     
    'Snapchat': '#c48a47'    
}

# Create subplots for the metrics
fig = make_subplots(
    rows=1, cols=2, 
    subplot_titles=(
        'Completion per Impression',
        'Completions per Click'
    ),
    vertical_spacing=0.1 
)

# Define the metrics to plot
metrics = ['Completion per impression', 'Completions per Click']

for idx, metric in enumerate(metrics):
    sorted_df = completion_df.sort_values(by=metric, ascending=False)
    
    for _, row in sorted_df.iterrows():
        platform = row['Platform']
        value = row[metric]
        
        # Format text label
        if metric == 'Completion per impression':
            text_label = f'{value:.2f}%'
        else:
            text_label = f'{value:.2f}'
        
        fig.add_trace(
            go.Bar(
                x=[platform],
                y=[value],
                name=platform if idx == 0 else None,  
                marker_color=platform_colors[platform],
                text=text_label,
                textposition='outside',
                showlegend=(idx == 0) 
            ),
            row=1, col=idx+1
        )
fig.update_layout(
    height=600,  
    width=1000, 
    title_text="Video Completion Metrics by Platform",
    title_x=0.5,
    title_y=0.95,  
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.15,  
        xanchor="center",
        x=0.5
    ),
    margin=dict(t=150, b=100) 
)
fig.update_yaxes(title_text="Rate (%)", row=1, col=1, range=[0, 70])  
fig.update_yaxes(title_text="Ratio", row=1, col=2, range=[0, 150])   
fig.update_annotations(
    yshift=30  
)

fig.show()

In [88]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Calculating video completion metrics for each platform
platform_metrics = []

# Process each platform
for platform_name in platform_data['source'].unique():
    platform_subset = platform_data[platform_data['source'] == platform_name]
    
    # Calculate totals
    platform_impressions = platform_subset['impressions'].sum()
    platform_clicks = platform_subset['clicks'].sum()
    platform_completions = platform_subset['video_completions'].sum()
    
    # Calculate video completion metrics
    platform_metrics.append({
        'Platform': platform_name,
        'Completion per impression': (platform_completions / platform_impressions) * 100,
        'Completions per Click': platform_completions / max(1, platform_clicks)
    })

# DataFrame
completion_df = pd.DataFrame(platform_metrics)
platform_colors = {
    'Meta': '#0866FF',       
    'YouTube': '#F26522',    
    'TikTok': '#444444',     
    'Snapchat': '#c48a47'    
}
fig = make_subplots(
    rows=1, cols=2, 
    subplot_titles=(
        'Completion per Impression',
        'Completions per Click'
    ),
    vertical_spacing=0.1 
)

# Define the metrics to plot
metrics = ['Completion per impression', 'Completions per Click']

for idx, metric in enumerate(metrics):
    sorted_df = completion_df.sort_values(by=metric, ascending=False)
    
    for _, row in sorted_df.iterrows():
        platform = row['Platform']
        value = row[metric]
        if metric == 'Completion per impression':
            text_label = f'{value:.2f}%'
        else:
            text_label = f'{value:.2f}'
        
        fig.add_trace(
            go.Bar(
                x=[platform],
                y=[value],
                name=platform if idx == 0 else None,  
                marker_color=platform_colors[platform],
                text=text_label,
                textposition='outside',
                showlegend=(idx == 0) 
            ),
            row=1, col=idx+1
        )

fig.update_layout(
    height=600,  
    width=1000, 
    title_text="Video Completion Metrics by Platform",
    title_x=0.5,
    title_y=0.95,  
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.15,  
        xanchor="center",
        x=0.5
    ),
    margin=dict(t=150, b=100) 
)

fig.update_yaxes(title_text="Percentage", row=1, col=1, range=[0, 70])  
fig.update_yaxes(row=1, col=2, range=[0, 150])  

fig.update_annotations(
    yshift=30  
)

fig.show()


##### Completion per Impression (%) This shows what percentage of videos were fully watched out of all the times they were shown
##### Completion per Click. This shows how many videos were fully watched for every click on the video.

##### Completion per Impression is about people watching a video after just seeing it without clicking.
##### Completion per Click is about people watching a video after they click on it.


In [None]:
import plotly.express as px
import pandas as pd

# Grouping data by source and device_type to sum video_completions
video_by_device = platform_data.groupby(['source', 'device_type'])['video_completions'].sum().reset_index()

# Sort platforms 
platform_totals = video_by_device.groupby('source')['video_completions'].sum().reset_index()
sorted_platforms = platform_totals.sort_values('video_completions', ascending=False)['source'].tolist()

# Calculate percentages for each device type within each platform
video_by_device = video_by_device.merge(platform_totals, on='source', suffixes=('', '_total'))
video_by_device['Percentage'] = (video_by_device['video_completions'] / video_by_device['video_completions_total']) * 100

# Define colors for device types
device_colors = {
    'Desktop': '#0866FF',    
    'SmartTV': '#F26522',    
    'Smartphone': '#444444', 
    'Tablet': '#c48a47'      
}

# Stacked Bar Chart
fig3 = px.bar(video_by_device, 
              y='source',                 
              x='video_completions',      
              color='device_type',
              title='Video Completions by Device Type and Platform',
              labels={'source': 'Platform', 'video_completions': 'Video Completions', 'device_type': 'Device Type'},
              log_x=True,                 
              orientation='h',            
              category_orders={'source': sorted_platforms},
              color_discrete_map=device_colors, 
              text=video_by_device['Percentage'].apply(lambda x: f'{x:.1f}%') 
)

fig3.update_layout(title={'x': 0.5}, width=1000, height=500)

fig3.show()

In [None]:
import plotly.express as px
import pandas as pd

# month_year column for monthly aggregation
platform_data['month_year'] = platform_data['date'].dt.to_period('M')

# Group by month_year and source, summing impressions
impressions_over_time = platform_data.groupby(['month_year', 'source'])['impressions'].sum().reset_index()

# Convert month_year back to datetime for plotting
impressions_over_time['month_year'] = impressions_over_time['month_year'].dt.to_timestamp()

platform_colors = {
    'Meta': '#0866FF',    
    'YouTube': '#F26522', 
    'TikTok': '#444444',  
    'Snapchat': '#c48a47' 
}

# Line Chart
fig4 = px.line(impressions_over_time, 
               x='month_year', 
               y='impressions', 
               color='source',
               title='Impressions Over Time by Platform',
               labels={'month_year': 'Date', 'impressions': 'Impressions', 'source': 'Platform'},
               log_y=True,
               color_discrete_map=platform_colors) 

fig4.update_layout(title={'x': 0.5}, width=1000, height=500)
fig4.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

