In [1]:
import pandas as pd

## Load data

In [13]:
google_ads = pd.read_csv('./datasets/google_ads.csv')  # Example columns: 'Date', 'Impressions', 'Clicks', 'Cost'
facebook_ads = pd.read_csv('./datasets/facebook_ads.csv')  # Example columns: 'date', 'impressions', 'clicks', 'spent'

In [14]:
google_ads.head()

Unnamed: 0,Date,Impressions,Clicks,Cost
0,21/06/2023,2834,355,203.62
1,02/05/2023,7949,610,111.89
2,03/08/2023,2147,325,288.17
3,04/10/2023,3656,464,423.82
4,05/11/2023,1999,349,170.0


In [15]:
facebook_ads.head()

Unnamed: 0,date,impressions,clicks,spent
0,01/21/2023,9374,283,380.13
1,03/13/2023,6496,850,201.62
2,03/29/2023,9267,251,422.72
3,04/10/2023,289,786,230.38
4,04/13/2023,1284,828,308.9


## Standardize Column Names

In [8]:
# Rename columns to create a uniform structure
columns_mappings = {
    'date': 'Date',
    'impressions': 'Impressions',
    'clicks': 'Clicks',
    'spent': 'Cost'
}

facebook_ads.rename(columns=columns_mappings, inplace=True)

In [9]:
facebook_ads.head(1)

Unnamed: 0,Date,Impressions,Clicks,Cost
0,2023-01-21,4655,494,455.57


## Normalize Data Formats

# Different date formats
dates = ['2023-01-01', '1/2/2023', 'January 3, 2023', '2023.01.04', '2023/01/05']

# Convert to standardized datetime format
standardized_dates = pd.to_datetime(dates)

print(standardized_dates)

In [3]:
import pandas as pd

def standardize_dates(dates):
    """
    Standardize a list of dates in various formats to a uniform datetime format.

    Parameters:
    - dates (list of str): A list of date strings in various formats.

    Returns:
    - pd.Series: A Pandas Series of dates standardized to the datetime64 format.
    """
    return pd.to_datetime(dates)

# Example usage
dates = ['2023-01-01', '1/2/2023', 'January 3, 2023', '2023.01.04', '2023/01/05']
standardized_dates = standardize_dates(dates)
print(standardized_dates)


ValueError: time data "1/2/2023" doesn't match format "%Y-%m-%d", at position 1. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

## Data Aggregation:


Let's consider next scenario, where we have data from different marketing channels: Social Media, Email, PPC (pay-per-click), and Display Ads.  Each channel generates its own set of data, including impressions, clicks, conversions, and costs. 
We want to aggregate this data to analyze overall performance metrics such as total impressions, clicks, conversions, average CPC (Cost Per Click), average CPA (Cost Per Acquisition), and ROI (Return on Investment) by channel. 

In [1]:
# Sample data representing different marketing channels
data = {
    'Channel': ['Social Media', 'Email', 'PPC', 'Display Ads', 'Social Media', 'Email', 'PPC', 'Display Ads'],
    'Impressions': [100000, 50000, 120000, 80000, 110000, 55000, 130000, 90000],
    'Clicks': [1000, 700, 1500, 800, 1100, 750, 1600, 850],
    'Conversions': [50, 60, 70, 40, 55, 65, 75, 45],
    'Total Spent': [2000, 1000, 2500, 1600, 2100, 1100, 2600, 1700],
    'Total Revenue': [5000, 4000, 7000, 3000, 5200, 4200, 7300, 3200]
}

# Creating a DataFrame
df = pd.DataFrame(data)
df

NameError: name 'pd' is not defined

In [None]:
# Aggregating data by channel
aggregated_data = df.groupby('Channel').agg(
    Total_Impressions=pd.NamedAgg(column='Impressions', aggfunc='sum'),
    Total_Clicks=pd.NamedAgg(column='Clicks', aggfunc='sum'),
    Total_Conversions=pd.NamedAgg(column='Conversions', aggfunc='sum'),
    Total_Spent=pd.NamedAgg(column='Total Spent', aggfunc='sum'),
    Total_Revenue=pd.NamedAgg(column='Total Revenue', aggfunc='sum')
).reset_index()

# Calculating additional metrics
aggregated_data['CPC'] = aggregated_data['Total_Spent'] / aggregated_data['Total_Clicks']
aggregated_data['CPA'] = aggregated_data['Total_Spent'] / aggregated_data['Total_Conversions']
aggregated_data['ROI'] = ((aggregated_data['Total_Revenue'] - aggregated_data['Total_Spent']) / aggregated_data['Total_Spent']) * 100

aggregated_data
