In [1]:
# @title Setup
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

project = 'sage-dragon-413617' # Project ID inserted based on the query results selected to explore
location = 'US' # Location inserted based on the query results selected to explore
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()
auth.authenticate_user()

## Reference SQL syntax from the original job
Use the ```jobs.query```
[method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) to
return the SQL syntax from the job. This can be copied from the output cell
below to edit the query now or in the future. Alternatively, you can use
[this link](https://console.cloud.google.com/bigquery?j=sage-dragon-413617:US:bquxjob_4011429c_18e26db4a42)
back to BigQuery to edit the query within the BigQuery user interface.

In [2]:
# Running this code will display the query used to generate your previous job

job = client.get_job('bquxjob_4011429c_18e26db4a42') # Job ID inserted based on the query results selected to explore
print(job.query)

SELECT * 
FROM `bigquery-public-data.google_ads_transparency_center.removed_creative_stats` 
LIMIT 100000


# Result set loaded from BigQuery job as a DataFrame
Query results are referenced from the Job ID ran from BigQuery and the query
does not need to be re-run to explore results. The ```to_dataframe```
[method](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.QueryJob.html#google.cloud.bigquery.job.QueryJob.to_dataframe)
downloads the results to a Pandas DataFrame by using the BigQuery Storage API.

To edit query syntax, you can do so from the BigQuery SQL editor or in the
```Optional:``` sections below.

In [3]:
# Running this code will read results from your previous job

job = client.get_job('bquxjob_4011429c_18e26db4a42') # Job ID inserted based on the query results selected to explore
results = job.to_dataframe()
results



Unnamed: 0,creative_page_url,region_stats,audience_selection_approach_info,disapproval
0,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'BE', 'first_shown': '2023-11...","{'demographic_info': 'CRITERIA_EXCLUDED', 'geo...","[{'removal_reason': 'Gambling and games', 'vio..."
1,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-11...","{'demographic_info': 'CRITERIA_EXCLUDED', 'geo...","[{'removal_reason': 'Technical requirements', ..."
2,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-09...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...","[{'removal_reason': 'Account suspension', 'vio..."
3,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-03...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...","[{'removal_reason': 'Copyrights', 'violation_c..."
4,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'BE', 'first_shown': '2023-09...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...","[{'removal_reason': 'Trademarks', 'violation_c..."
...,...,...,...,...
99995,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-11...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...","[{'removal_reason': 'Editorial', 'violation_ca..."
99996,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-07...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...","[{'removal_reason': 'Editorial', 'violation_ca..."
99997,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'AT', 'first_shown': '2023-05...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...",[{'removal_reason': 'Other restricted business...
99998,https://adstransparency.google.com/removed/cre...,"[{'region_code': 'BG', 'first_shown': '2023-10...","{'demographic_info': 'CRITERIA_INCLUDED', 'geo...",[{'removal_reason': 'Healthcare and medicines'...


In [5]:
import pandas as pd

# Explode 'region_stats' column to create separate rows for each dictionary
exploded_results = results.explode('region_stats')

# Normalize the exploded DataFrame to flatten the dictionaries within 'region_stats'
normalized_region_stats = pd.json_normalize(exploded_results['region_stats'])

# Concatenate the original DataFrame with the normalized DataFrame for 'region_stats'
merged_results_region = pd.concat([exploded_results.reset_index(drop=True), normalized_region_stats.reset_index(drop=True)], axis=1)

# Drop the 'region_stats' column
merged_results_region = merged_results_region.drop(columns=['region_stats'])

# Explode 'disapproval' column to create separate rows for each dictionary
exploded_disapproval = merged_results_region.explode('disapproval')

# Normalize the exploded DataFrame to flatten the dictionaries within 'disapproval'
normalized_disapproval = pd.json_normalize(exploded_disapproval['disapproval'])

# Concatenate the DataFrame with the normalized DataFrame for 'disapproval'
merged_results_disapproval = pd.concat([exploded_disapproval.reset_index(drop=True), normalized_disapproval.reset_index(drop=True)], axis=1)

# Drop the 'disapproval' column
merged_results_disapproval = merged_results_disapproval.drop(columns=['disapproval'])

# Convert 'audience_selection_approach_info' column to a DataFrame
audience_info_df = pd.DataFrame(merged_results_disapproval['audience_selection_approach_info'].tolist())

# Concatenate the original DataFrame with the new DataFrame containing separated columns for 'audience_selection_approach_info'
merged_results_audience = pd.concat([merged_results_disapproval.reset_index(drop=True), audience_info_df.reset_index(drop=True)], axis=1)

# Drop the 'audience_selection_approach_info' column
merged_results_final = merged_results_audience.drop(columns=['audience_selection_approach_info'])

# Displaying the final DataFrame
merged_results_final.head()




Unnamed: 0,creative_page_url,region_code,first_shown,last_shown,times_shown_end_date,times_shown_lower_bound,times_shown_upper_bound,times_shown_start_date,times_shown_availability_date,surface_serving_stats,...,removal_reason,violation_category,use_of_automated_means,removal_location,decision_type,demographic_info,geo_location,contextual_signals,customer_lists,topics_of_interest
0,https://adstransparency.google.com/removed/cre...,BE,2023-11-15,2023-12-04,2023-12-04,0.0,1000.0,2023-11-15,,,...,Gambling and games,RESTRICTED_CONTENT_AND_FEATURES,False,"[ES, PL, BE, FR, BG, DK, NL, IT, GR, PT, CZ, R...",Google investigation,CRITERIA_EXCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED_AND_EXCLUDED,CRITERIA_UNUSED
1,https://adstransparency.google.com/removed/cre...,BE,2023-11-15,2023-12-04,2023-12-04,0.0,1000.0,2023-11-15,,,...,Requirements for third party ad serving,EDITORIAL_AND_TECHNICAL,False,[GLOBAL],Google investigation,CRITERIA_EXCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED_AND_EXCLUDED,CRITERIA_UNUSED
2,https://adstransparency.google.com/removed/cre...,BG,2023-11-29,2023-12-04,2023-12-04,0.0,1000.0,2023-11-29,,,...,Gambling and games,RESTRICTED_CONTENT_AND_FEATURES,False,"[ES, PL, BE, FR, BG, DK, NL, IT, GR, PT, CZ, R...",Google investigation,CRITERIA_EXCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED_AND_EXCLUDED,CRITERIA_UNUSED
3,https://adstransparency.google.com/removed/cre...,BG,2023-11-29,2023-12-04,2023-12-04,0.0,1000.0,2023-11-29,,,...,Requirements for third party ad serving,EDITORIAL_AND_TECHNICAL,False,[GLOBAL],Google investigation,CRITERIA_EXCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED_AND_EXCLUDED,CRITERIA_UNUSED
4,https://adstransparency.google.com/removed/cre...,CZ,2023-11-29,2023-12-04,2023-12-04,0.0,1000.0,2023-11-29,,,...,Gambling and games,RESTRICTED_CONTENT_AND_FEATURES,False,"[ES, PL, BE, FR, BG, DK, NL, IT, GR, PT, CZ, R...",Google investigation,CRITERIA_EXCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED,CRITERIA_INCLUDED_AND_EXCLUDED,CRITERIA_UNUSED


In [None]:
from google.colab import files

# Save the merged dataframe as an Excel file
merged_results_final.to_excel('removed_creative_stats.xlsx', index=False)

# Download the Excel file
files.download('removed_creative_stats.xlsx')