In [16]:
import pandas as pd

## Load data

In [17]:
google_ads = pd.read_csv('./datasets/google_ads.csv')  # Example columns: 'Date', 'Impressions', 'Clicks', 'Cost'
facebook_ads = pd.read_csv('./datasets/facebook_ads.csv')  # Example columns: 'date', 'impressions', 'clicks', 'spent'

In [18]:
google_ads.head()

Unnamed: 0,Date,Impressions,Clicks,Cost
0,21/06/2023,2834,355,203.62
1,02/05/2023,7949,610,111.89
2,03/08/2023,2147,325,288.17
3,04/10/2023,3656,464,423.82
4,05/11/2023,1999,349,170.0


In [19]:
facebook_ads.head()

Unnamed: 0,date,impressions,clicks,spent
0,01/21/2023,9374,283,380.13
1,03/13/2023,6496,850,201.62
2,03/29/2023,9267,251,422.72
3,04/10/2023,289,786,230.38
4,04/13/2023,1284,828,308.9


## Standardize Column Names

In [20]:
# Rename columns to create a uniform structure
columns_mappings = {
    'date': 'Date',
    'impressions': 'Impressions',
    'clicks': 'Clicks',
    'spent': 'Cost'
}

facebook_ads.rename(columns=columns_mappings, inplace=True)

In [21]:
facebook_ads.head(1)

Unnamed: 0,Date,Impressions,Clicks,Cost
0,01/21/2023,9374,283,380.13


## Normalize Data Formats

In [22]:
# Convert to standardized datetime format
facebook_ads['Date'] = pd.to_datetime(facebook_ads['Date'], format='mixed')
facebook_ads.head()


Unnamed: 0,Date,Impressions,Clicks,Cost
0,2023-01-21,9374,283,380.13
1,2023-03-13,6496,850,201.62
2,2023-03-29,9267,251,422.72
3,2023-04-10,289,786,230.38
4,2023-04-13,1284,828,308.9


## Data Aggregation:


Let's consider next scenario, where we have data from different marketing channels: Social Media, Email, PPC (pay-per-click), and Display Ads.  

Each channel generates its own set of data, including impressions, clicks, conversions, and costs. 

We want to aggregate this data to analyze overall performance metrics such as total impressions, clicks, conversions, average CPC (Cost Per Click), average CPA (Cost Per Acquisition), and ROI (Return on Investment) by channel. 

In [25]:
# Sample data representing different marketing channels
data = {
    'Channel': ['Social Media', 'Email', 'PPC', 'Display Ads', 'Social Media', 'Email', 'PPC', 'Display Ads'],
    'Impressions': [100000, 50000, 120000, 80000, 110000, 55000, 130000, 90000],
    'Clicks': [1000, 700, 1500, 800, 1100, 750, 1600, 850],
    'Conversions': [50, 60, 70, 40, 55, 65, 75, 45],
    'Total Spent': [2000, 1000, 2500, 1600, 2100, 1100, 2600, 1700],
    'Total Revenue': [5000, 4000, 7000, 3000, 5200, 4200, 7300, 3200]
}

# Creating a DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,Channel,Impressions,Clicks,Conversions,Total Spent,Total Revenue
0,Social Media,100000,1000,50,2000,5000
1,Email,50000,700,60,1000,4000
2,PPC,120000,1500,70,2500,7000
3,Display Ads,80000,800,40,1600,3000
4,Social Media,110000,1100,55,2100,5200
5,Email,55000,750,65,1100,4200
6,PPC,130000,1600,75,2600,7300
7,Display Ads,90000,850,45,1700,3200


In [30]:
aggregated_data = df.groupby('Channel').sum().reset_index()

# # Calculating additional metrics
aggregated_data['CPC'] = aggregated_data['Total Spent'] / aggregated_data['Clicks']
aggregated_data['CPA'] = aggregated_data['Total Spent'] / aggregated_data['Conversions']
aggregated_data['ROI'] = ((aggregated_data['Total Revenue'] - aggregated_data['Total Spent']) / aggregated_data['Total Spent']) * 100

aggregated_data


Unnamed: 0,Channel,Impressions,Clicks,Conversions,Total Spent,Total Revenue,CPC,CPA,ROI
0,Display Ads,170000,1650,85,3300,6200,2.0,38.823529,87.878788
1,Email,105000,1450,125,2100,8200,1.448276,16.8,290.47619
2,PPC,250000,3100,145,5100,14300,1.645161,35.172414,180.392157
3,Social Media,210000,2100,105,4100,10200,1.952381,39.047619,148.780488
