In [1]:
import warnings
import pandas as pd
import plotly.express as px

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings('ignore')

In [2]:
# Read CSV File
df = pd.read_csv(r'C:\Users\Rafael_Fagundes\Downloads\GCM_Browser_YTD.csv'
                 , skipfooter=1, skiprows=10, parse_dates=['Date'])

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20650830 entries, 0 to 20650829
Data columns (total 4 columns):
 #   Column            Dtype         
---  ------            -----         
 0   Date              datetime64[ns]
 1   Placement         object        
 2   Browser/Platform  object        
 3   Impressions       int64         
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 630.2+ MB


In [4]:
# Add Premutive Flag and Browser mapping
df['Permutive_flag'] = df['Placement'].str.contains(r'Permutive|Permutvie|permu|Permu|ermu'
                                                                  , case=False, regex=True)

AudienceTypeName_code = '(CRM-1PD CRM|RETARGET-1PD RETARGET|CRM-1PD|MEDIA-1PD MEDIA|LAPSED-1PD PIXEL|ABM-1PD AUDIENCE|CATEGORY-3PD CONTEXTUAL|KCT-3PD CONTEXTUAL|IN MARKET-3PD CONTEXTUAL|ABM-3PD AUDIENCE|DMP-3PD AUDIENCE|IN MARKET-3PD BEHAVIOR|MEDIA-3PD RETARGET EXPOSED|AFFINITY-3PD BEHAVIOR|ROS-3PD PMP|PROXIMITY-3PD CONTEXTUAL|CTV-3PD RETARGET EXPOSED|DIGITAL-3PD RETARGET EXPOSED|DMP-3PD LAL|DOMAIN-3PD CONTEXTUAL|SITE IN MARKET-3PD BEHAVIOR|VIDEO-3PD RETARGET EXPOSED|PROXIMITY-3PD BEHAVIOR|PIXEL-3PD LAL|RETARGETING-3PD PIXEL|CRM-1PD-AIQ-NS|CE AFFINITY-3PD BEHAVIOR|MB AFFINITY-3PD BEHAVIOR|RETARGET-3PD COM SIZE|EMAIL-1PD CRM|3PD AUDIENCE|-ABM3PD AUDIENCE|CRM-1PD AUDIENCE|AFFINITY-1PD PIXEL|IN MARKET-1PD PIXEL|LAPSED USERS-1PD PIXEL|PERSONA-1PD RETARGET|MEDIA-1PD|LAPSED LAL-1PD PIXEL|CAMPAIGN-3PD LAL|ROS-3PD ROS|AUDIO-3PD RETARGET EXPOSED|BEHAVIORAL-3PD RETARGET|CONTEXTUAL-3PD RETARGET|CONTEXTUAL-3PD|BEHAVIORAL-3PD|PIXEL-3PD LAL-PRODUCT|CRM-3PD LAL|IN-MARKET-3PD BEHAVIOR|LAL-3PD BEHAVIOR|LAPSED USER-3PD BEHAVIOR|LAPSED-3PD BEHAVIOR|LAPSED-1PD PIXEL-1PD PIXEL|IN MARKET-3PD CSBTEXTUAL|KSTD-3PD CONTEXTUAL|SITE IN MARKET-3PD CONTEXTUAL|RETARGET-3PD RETARGET|CATEGORY-1PD CONTEXTUAL|KCT-1PD CONTEXTUAL|PIXEL-3PD LAL-FAMILY|LIN MARKET-3PD BEHAVIOR)'

df['Audience Type Name'] = df['Placement'].str.extract(AudienceTypeName_code)

browser_mapping = {
    'Chrome' : 'Chrome',
    'iPhone / iPod touch' : 'Safari',
    'Microsoft Edge' : 'Edge',
    'Android' : 'Chrome',
    'Unknown' : 'Other',
    'Safari' : 'Safari',
    'Firefox' : 'Firefox',
    'Mozilla' : 'Firefox',
    'iPad' : 'Safari',
    'Roku' : 'Other',
    'Opera' : 'Other',
    'Microsoft Internet Explorer' : 'Other',
    'Yandex' : 'Other',
    'Netscape Navigator' : 'Other',
    'Opera Next' : 'Other',
    '(not set)' : 'Other'
}

df['Browser Group'] = df['Browser/Platform'].map(browser_mapping)

In [5]:
# Group dataframe
df_grouped = df.groupby([
    'Date',
    'Campaign',
    'Audience Type Name',
    'Permutive_flag',
    'Browser Group'
]).agg({
    'Impressions': 'sum',
    'Clicks': 'sum',
    'Total Conversions': 'sum',
    'Total Revenue': 'sum',
    'View-through Revenue': 'sum',
    'Click-through Revenue': 'sum'
}).reset_index()

KeyError: 'Campaign'

Columns

In [None]:
Country_Code = '(_US_|_CA_|_UK_)'
BU_Code = '(_AW_|_B2B-CHANNEL_|_B2B-COM_|_B2B-CSG_|_B2B-ISG_|_B2B-MB_|_BRAND-PAN-DELL_|_CET_|_COM_|_CON_|_CONS_|_CSB_|_CSBO-AW_|_CSBO-AW/GSERIES_|_CSBO-CON_|_CSBO-CSB_|_CSBO-GSERIES_|_CSBO-OUT_|_CSBO-PAN-DELL_|_CSBO-PROGRAMS_|_CSBO-SB_|_CSG_|_DTC_|_GAM_|_ISG_|_ISG _|_MB_|_MPP_|_OUT_|_OUTCON_|_OUTSB_|_PANDELL_|_PAN-DELL_|_RET_|_RETAIL_|_SB_|_SBG-ISG_|_SBG-PROGRAMS_|_SBG-SB_|_SB-ISG_|_SB-SBG_|_SMB_)'
Funnel_Code = '(_AP_|_BSP_|_DCR_|_HP_|_LF_|_LG_|_MF_|_PP_|_UF_)'
FY_Code = '(2022_Q1_|2022_Q2_|2022_Q3_|2022_Q4_|2023_Q1_|2023_Q2_|2023_Q3_|2023_Q4_|2024_Q1_|2024_Q2_|2024_Q3_|2024_Q4_)'
Vehicle_Code = '(_AFF_|_AUDIO_|_BRAND_|_CTV_|_Direct_|_DISPLAY_|_DISPLAY-DCO_|_DISPLAY-MOBILE_|_EMAIL-BAU_|_EMAIL-LCM_|_LEAD GEN_|_LEAD-GEN_|_MOBILE_|_RET_|_SEM_|_SNS_|_SOCIAL_|_VIDEO_|_VIDEO-MOBILE_)'

df_grouped['Country'] = df_grouped['Campaign'].str.upper().str.extract(Country_Code)
df_grouped['BU'] = df_grouped['Campaign'].str.upper().str.extract(BU_Code)
df_grouped['Funnel'] = df_grouped['Campaign'].str.upper().str.extract(Funnel_Code)
#df_grouped['Fiscal Quarter'] = df_grouped['Campaign'].str.upper().str.extract(FY_Code)
df_grouped['Vehicle'] = df_grouped['Campaign'].str.upper().str.extract(Vehicle_Code)

df_grouped['Country'] = df_grouped['Country'].str.replace('_', '')
df_grouped['BU'] = df_grouped['BU'].str.replace('_', '')
df_grouped['Funnel'] = df_grouped['Funnel'].str.replace('_', '')
#df_grouped['Fiscal Quarter'] = df_grouped['Fiscal Quarter'].str.replace('_', '')
df_grouped['Vehicle'] = df_grouped['Vehicle'].str.replace('_', '')

Filter

In [None]:
df_filtered = df_grouped[~df_grouped['Campaign'].isin(['DO NOT USE'])]

Calendar

In [None]:
df_calendar = pd.read_csv('../../Helper/Calendar.csv', parse_dates=['Date'])

df_calendar = df_calendar[['Date','Fiscal Quarter','Fiscal Week Quarter']]

Merge

In [None]:
df_merged = pd.merge(df_filtered, df_calendar, on='Date', how='left')

In [None]:
df_regroup = df_merged.groupby([
    'Fiscal Quarter',
    'Fiscal Week Quarter',
    'Campaign',
    'Audience Type Name',
    'Permutive_flag',
    'Browser Group',
    'Country',
    'BU',
    'Funnel',
    'Vehicle'
]).agg({
    'Impressions': 'sum',
    'Clicks': 'sum',
    'Total Conversions': 'sum',
    'Total Revenue': 'sum',
    'View-through Revenue': 'sum',
    'Click-through Revenue': 'sum'
}).reset_index()

In [None]:
df_regroup.to_csv('csv/Browser_Mix_by_Campaign.csv')