# Fetch ads archive from Facebook API

Test on the Graph explorer:
* https://developers.facebook.com/tools/explorer
* `ads_archive?fields=ad_creative_body%2Cad_creation_time%2Cad_creative_link_caption%2Cad_creative_link_description%2Ccurrency%2Cfunding_entity%2Cimpressions%2Cad_snapshot_url%2Cpage_id%2Cpage_name%2Cspend&search_terms=''&ad_reached_countries=['FR']&limit=25`

Documentation:
* https://www.facebook.com/ads/library/?active_status=all&ad_type=political_and_issue_ads&country=FR
* https://www.facebook.com/ads/library/api/?source=archive-landing-page
    

In [1]:
import collections
import pprint
import time

import requests
import pandas

import creds

In [2]:
FIELDS = [
    'ad_creation_time',
    'ad_creative_body',
    'ad_creative_link_caption',
    'ad_creative_link_description',
    'ad_creative_link_title',
    'ad_delivery_start_time',
    'ad_delivery_stop_time',
    'ad_snapshot_url',
    'currency',
    'demographic_distribution',
    'funding_entity',
    'impressions',
    'page_id',
    'page_name',
    'region_distribution',
    'spend',
]


In [5]:
def fetch(country_code, search_params):
    def make_request(after=None):
        params = {
            # 'ad-type': 'POLITICAL_AND_ISSUE_ADS' (default)
            **search_params,
            'fields': ','.join(FIELDS),
            #'search_terms': "''",
            #'search_page_ids': ,
            'ad_reached_countries': "['{}']".format(country_code),
            'limit': 5000,
            'access_token': creds.FB_TOKEN,
        }
        if after:
            params['after'] = after

        response = requests.get(
            "https://graph.facebook.com/v3.3/ads_archive",
            params=params,
        )

        assert response.status_code == 200, (response.status_code, response.text)
        json_data = response.json()

        assert set(json_data) <= {'data', 'paging'}, set(json_data)

        ads = json_data['data']

        if 'paging' in json_data:
            paging = json_data['paging']
            assert set(paging) <= {'cursors', 'next', 'previous'}, paging
            assert set(paging['cursors']) <= {'after', 'before'}, paging
            after = json_data['paging']['cursors'].get('after')
        else:
            after = None

        return ads, after
    
    ads, after = make_request()
    while(after):
        ads_batch, after = make_request(after=after)
        ads += ads_batch

    return ads

## Fetch using a single request with an empty search

In [6]:
ads = fetch(country_code='GB', search_params={'search_terms': "''"})
len(ads)

2737

This number does not match those from the report.

In [7]:
df = pandas.DataFrame(ads)
df

Unnamed: 0,ad_creation_time,ad_creative_body,ad_creative_link_caption,ad_creative_link_description,ad_creative_link_title,ad_delivery_start_time,ad_delivery_stop_time,ad_snapshot_url,currency,demographic_distribution,funding_entity,impressions,page_id,page_name,region_distribution,spend
0,2019-05-15T09:37:40+0000,Less Fireworks / More Cooperative Working – Yo...,,,,2019-05-15T09:37:44+0000,2019-05-19T09:37:40+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.083333', 'age': '65+', 'gen...",Louis Stephen,"{'lower_bound': '0', 'upper_bound': '999'}",551289148568790,Cllr Louis Stephen,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
1,2019-05-15T09:29:56+0000,"For the first time in our history, we're polli...",,,,2019-05-15T09:29:56+0000,2019-05-22T23:00:00+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.030626', 'age': '55-64', 'g...",The Green Party of England and Wales,"{'lower_bound': '1000', 'upper_bound': '4999'}",20995300784,Green Party of England and Wales,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
2,2019-05-15T09:29:56+0000,"For the first time in our history, we're polli...",,,,2019-05-15T09:29:56+0000,2019-05-22T23:00:00+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.098488', 'age': '25-34', 'g...",The Green Party of England and Wales,"{'lower_bound': '1000', 'upper_bound': '4999'}",20995300784,Green Party of England and Wales,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
3,2019-05-15T08:45:34+0000,ISLAND’S MSP INVITES CABINET SECRETARY FOR RUR...,,,,2019-05-15T08:45:36+0000,2019-05-18T08:45:34+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.333333', 'age': '65+', 'gen...",Kenneth Gibson,"{'lower_bound': '0', 'upper_bound': '999'}",2745800252158948,Isle of Arran SNP,"[{'percentage': '1', 'region': 'Scotland'}]","{'lower_bound': '0', 'upper_bound': '99'}"
4,2019-05-15T08:41:38+0000,"61% of the country overall, and over 75% of th...",,,,2019-05-15T08:41:48+0000,2019-05-23T08:41:38+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.181818', 'age': '18-24', 'g...",Paul Gray,"{'lower_bound': '0', 'upper_bound': '999'}",104499892929845,Havant Liberal Democrats,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
5,2019-05-13T19:30:31+0000,Take the People's Vote test here in the North-...,,,,2019-05-15T08:00:00+0000,2019-05-22T08:00:00+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.147541', 'age': '55-64', 'g...",Open Britain - West Cumbria,"{'lower_bound': '0', 'upper_bound': '999'}",267518870499295,Open Britain - West Cumbria,"[{'percentage': '0.733333', 'region': 'England...","{'lower_bound': '0', 'upper_bound': '99'}"
6,2019-05-15T06:35:36+0000,For radical Climate Action the UK must Remain ...,,,,2019-05-15T06:35:39+0000,2019-05-24T06:35:36+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.073643', 'age': '55-64', 'g...",Karl Eslie Borges,"{'lower_bound': '0', 'upper_bound': '999'}",356980384912024,Alresford & Itchen Valley Greens,"[{'percentage': '0.996139', 'region': 'England...","{'lower_bound': '0', 'upper_bound': '99'}"
7,2019-05-14T21:12:52+0000,https://www.change.org/p/give-victims-support-...,change.org,Episode 13 - Are HM Employment Judges Involved...,ARE HM EMPLOYMENT JUDGES INVOLVED IN CORRUPTIO...,2019-05-14T21:13:04+0000,2019-05-24T21:12:52+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.20354', 'age': '18-24', 'ge...",Craig Chant,"{'lower_bound': '0', 'upper_bound': '999'}",195394467932052,Disabled Lives Matter,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
8,2019-05-14T20:16:33+0000,***Footway Improvements in Llanrumney - Mount ...,,,,2019-05-14T20:16:33+0000,2019-05-15T19:30:21+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.113495', 'age': '18-24', 'g...",Llanrumney Labour News,"{'lower_bound': '1000', 'upper_bound': '4999'}",1807970796086865,Llanrumney Labour News,"[{'percentage': '1', 'region': 'Wales'}]","{'lower_bound': '0', 'upper_bound': '99'}"
9,2019-05-14T19:59:49+0000,Creativity Movement; group which operates also...,,,,2019-05-14T19:59:57+0000,2019-05-24T19:59:49+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.206897', 'age': '35-44', 'g...",Alberto Testa,"{'lower_bound': '0', 'upper_bound': '999'}",1899667790263172,Professor Alberto Testa,"[{'percentage': '0.714286', 'region': 'England...","{'lower_bound': '0', 'upper_bound': '99'}"


In [10]:
df[df['page_name']=='Disabled Lives Matter']

Unnamed: 0,ad_creation_time,ad_creative_body,ad_creative_link_caption,ad_creative_link_description,ad_creative_link_title,ad_delivery_start_time,ad_delivery_stop_time,ad_snapshot_url,currency,demographic_distribution,funding_entity,impressions,page_id,page_name,region_distribution,spend
7,2019-05-14T21:12:52+0000,https://www.change.org/p/give-victims-support-...,change.org,Episode 13 - Are HM Employment Judges Involved...,ARE HM EMPLOYMENT JUDGES INVOLVED IN CORRUPTIO...,2019-05-14T21:13:04+0000,2019-05-24T21:12:52+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.20354', 'age': '18-24', 'ge...",Craig Chant,"{'lower_bound': '0', 'upper_bound': '999'}",195394467932052,Disabled Lives Matter,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"
473,2019-05-11T20:54:07+0000,Please show some care & respect for the disabl...,change.org,Following the previous video covering my autis...,Solicitor & Barrister Threaten Autistic Disabl...,2019-05-11T20:54:20+0000,2019-05-21T20:54:07+0000,https://www.facebook.com/ads/archive/render_ad...,GBP,"[{'percentage': '0.002107', 'age': '35-44', 'g...",Craig Chant,"{'lower_bound': '1000', 'upper_bound': '4999'}",195394467932052,Disabled Lives Matter,"[{'percentage': '1', 'region': 'England'}]","{'lower_bound': '0', 'upper_bound': '99'}"


## Search by page

In [11]:
page_ids = set(df['page_id'])
page_names = set(df['page_name'])
print('The global search returned {} different pages'.format(len(page_ids)))

The global search returned 246 different pages


In [48]:
# Warning: this is likely to trigger the rate limiting
for page_id in page_ids:
    print('Search for page {}'.format(page_id))

    ads_page = fetch(search_params={'search_page_ids': page_id})
    
    nb_ads_global_search = len(df[df['page_id']==page_id])
    nb_ads_page_search = len(ads_page)

    if nb_ads_global_search != nb_ads_page_search:
        page_name = list(df[df['page_id']==page_id]['page_name'])[0]
        print('The numbers do not match for page {}: {} in page search vs {} in global search'.format(
            page_name, nb_ads_page_search, nb_ads_global_search
        ))


305531639643278
1934566156780439
407402162736656
167962303683318
1834548586875556
138435322972255
298398717018170
365430600210847
741181696045328
2317233945166352
176178532420023
883296735214769
102734363097680
678120165658753
30239959348
360322167647280
137385490131
310039442421596
452897548398251
201951136994785
228249960960213
386752004789933
435340366822684
2235255886787452
367006363463686
329262860438866
249840792393105
450568105101647
6587671199
491862484305741
485789368166508
146383495502578
111257452230362
1799561767025031
1361286460616795
211043423125695
17373130431
555226237822956
308313479373888
685145698338214
2257604454310952
105732052794210
1807970796086865
1168773666629907
226189004178864
199108670575850
324070087738168
396013737832767
327226704149899
Discrepancy!!
327226704149899 East Herts Green Party 1 0
111956985487415
123817579216
1106278742785368
474613052643462
9250349228
1793510380925494
535129226536200
679879869119530
2041107186182812
628368003989904
15713517832

AssertionError: 

It seems that generaly searching a specific page does not give more results than the general query.

## Most common values

In [12]:
def find_most_common(field):
    l = [
        ad[field]
        for ad in ads
        if field in ad
    ]
    c = collections.Counter(l)
    pprint.pprint(c.most_common(20))

In [13]:
find_most_common('funding_entity')

[('The Conservative Party', 1167),
 ('Friends of the Earth', 315),
 ('the Liberal Democrats', 215),
 ('Change UK - The Independent Group', 205),
 ('Conservatives', 191),
 ('QuoteSearch', 60),
 ('The Labour Party', 58),
 ('The Brexit Party', 52),
 ('Best for Britain', 39),
 ('Friends of the Earth England, Wales and Northern Ireland', 31),
 ('Terence Brotheridge', 26),
 ('Alexander Guy Dale', 13),
 ('crudelydrawn', 11),
 ("It's Our City", 10),
 ('First News ', 10),
 ('Terence Leonard Brotheridge', 9),
 ('Social Democratic and Labour Party (SDLP)', 8),
 ('The Independent Group (TIG) Ltd, company number 11770529, a registered '
  'company in England and Wales.',
  8),
 ('Uniunea Salvați România - USR', 7),
 ('The Conservative Party ', 7)]


In [14]:
find_most_common('page_name')

[('Conservatives', 447),
 ('Friends of the Earth', 346),
 ('Change UK - The Independent Group', 213),
 ('Liberal Democrats', 209),
 ('Paul Bristow', 108),
 ('Andy Street', 88),
 ('Robert Largan for High Peak', 68),
 ('Derek Thomas', 62),
 ('QuoteSearch.com', 60),
 ('Stuart Andrew', 60),
 ('Tom Hunt', 59),
 ('Peter Gibson For Darlington', 57),
 ('Laura Wirral West', 54),
 ('The Brexit Party', 52),
 ('Best For Britain', 39),
 ('The Labour Party', 37),
 ('Eddie Hughes MP', 16),
 ('Stuart Anderson', 16),
 ('Damien Moore MP', 15),
 ('Isobel Grant', 15)]
