To start working with the Twitter Ads API you'll first need to install the Twitter Ads SDK. Once that is installed it's also recommended to install and use Twurl to handle the API authentication process.

Twitters Ads Insight SDK for Python - http://twitterdev.github.io/twitter-python-ads-sdk/   
Twitters Ads Platform Tools - https://github.com/twitterdev/ads-platform-tools
    
Setting up Twurl:
- https://github.com/twitter/twurl
- https://dev.twitter.com/ads/tutorials/using-twurl  

Ads Analytics API Best Practices:
- https://dev.twitter.com/ads/analytics/best-practices
- https://dev.twitter.com/ads/analytics/metrics-and-segmentation
- https://dev.twitter.com/ads/analytics/metrics-derived
- https://dev.twitter.com/ads/tutorials/hierarchy-and-terminology

## Twitter Ad API ##

In [33]:
import pandas as pd
# default notebook setup
MAX_ROWS = 10
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.3f}'.format
# import the needed modules
import time
from twitter_ads.client import Client
from twitter_ads.campaign import LineItem
from twitter_ads.enum import METRIC_GROUP
from pprint import pprint

#### Helper functions ####

In [34]:
# defined functions that will be utilized later on

# import requests
import oauth2 as oauth
import yaml
# import urllib
import json
import os
import time
# import pytz
import datetime
import argparse
import re
import sys

# Handles the API authentication. See https://developer.twitter.com/en/docs/tutorials/using-twurl & https://github.com/twitter/twurl for more info.
def twurlauth():
    with open(os.path.expanduser('~/.twurlrc'), 'r') as f:
        contents = yaml.load(f)
        f.close()

    default_user = contents["configuration"]["default_profile"][0]

    CONSUMER_KEY = contents["configuration"]["default_profile"][1]
    CONSUMER_SECRET = contents["profiles"][default_user][CONSUMER_KEY]["consumer_secret"]
    USER_OAUTH_TOKEN = contents["profiles"][default_user][CONSUMER_KEY]["token"]
    USER_OAUTH_TOKEN_SECRET = contents["profiles"][default_user][CONSUMER_KEY]["secret"]

    return CONSUMER_KEY, CONSUMER_SECRET, USER_OAUTH_TOKEN, USER_OAUTH_TOKEN_SECRET

# Handles the request
def request(user_twurl, http_method, headers, url):
    CONSUMER_KEY = user_twurl[0]
    CONSUMER_SECRET = user_twurl[1]
    USER_OAUTH_TOKEN = user_twurl[2]
    USER_OAUTH_TOKEN_SECRET = user_twurl[3]

    consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET)
    token = oauth.Token(key=USER_OAUTH_TOKEN, secret=USER_OAUTH_TOKEN_SECRET)
    client = oauth.Client(consumer, token)

    header_list = {}
    if headers:
        for i in headers:
            (key, value) = i.split(': ')
            if key and value:
                header_list[key] = value

    response, content = client.request(url, method=http_method, headers=header_list)

    try:
        data = json.loads(content)
    except:
        data = None
    return response, data

# Gets the data
def get_data(user_twurl, http_method, headers, url):
    data = []

    res_headers, response = request(user_twurl, http_method, headers, url)

    if res_headers['status'] != '200':
        print('ERROR: query failed, cannot continue: %s' % url)
        sys.exit(0)

    if response and 'data' in response:
        data += response['data']

    while 'next_cursor' in response and response['next_cursor'] is not None:
        cursor_url = url + '&cursor=%s' % response['next_cursor']
        res_headers, response = request(user_twurl, http_method, headers, cursor_url)

        if response and 'data' in response:
            data += response['data']

    return data

# Reformat timestamps
def format_timestamp(timestamp):
    return datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')

# Check the datetime filtering of the data
def check(data, start_time, end_time, filter_field=None, filter_data=[]):

    d = []

    if data and len(data) > 0:
        for i in data:
            if 'end_time' in i and i['end_time'] and format_timestamp(i['end_time']) < start_time:
                continue
            elif ('start_time' in i and i['start_time'] and
                  format_timestamp(i['start_time']) > end_time):
                continue
            elif i['deleted'] and format_timestamp(i['updated_at']) < start_time:
                continue
#             elif i['paused'] and format_timestamp(i['updated_at']) < start_time:
#                 continue
            elif filter_field and i[filter_field] not in filter_data:
                continue
            else:
                d.append(i['id'])

    return d

def fetcher(job_id):
    import sys
    seconds = 10
    tries = 3
    print 'Job is running..'
    while tries != 0:
        for i in xrange(seconds,0,-1):
            time.sleep(1)
        # fetch the job result of the specified async job ID
        async_stats_job_result = LineItem.async_stats_job_result(account, job_id)
        if async_stats_job_result['status'] == 'SUCCESS':
            print 'Fetching the response of the specified async job ID'
            # fetch the response of the specified async job ID
            async_data = LineItem.async_stats_job_data(account, async_stats_job_result['url'])
            print 'Done'
            return async_data
        else:
            tries -= 1
            print 'Job is still processing. %s tries left' % tries

#### Build the request ####

In [35]:
# # Create a new Ads account on the SANDBOX environment and return the account_id

# headers = None
# url = 'https://ads-api-sandbox.twitter.com/2/accounts/' # SANDBOX_DOMAIN
# response, content = request(user_twurl, 'POST', headers, url)
# sandbox_account_id = content['data']['id']
# print 'Sandbox Account ID:', sandbox_account_id

In [36]:
# API Authentication
user_twurl = twurlauth() # API authentication using the twurl function above

In [195]:
# Set the parameters for the request

# DOMAIN = 'https://ads-api.twitter.com'     # PROD_DOMAIN
DOMAIN = 'https://ads-api-sandbox.twitter.com'     # SANDBOX_DOMAIN

ACCOUNT_ID = 'gq183a'    #PROD ACCOUNT_ID
# ACCOUNT_ID = 'gq183a'     #SANDBOX_DOMAIN
headers = None

# Set the timeframe to pull
start_time = datetime.datetime.strptime('2017-07-01', '%Y-%m-%d')
end_time = datetime.datetime.strptime('2017-07-31', '%Y-%m-%d')
granularity = 'HOUR'

# Adds a day and subtracts a second so you get the full 23:59:59 h:m:s end_time day
end_time += datetime.timedelta(hours=23)
end_time -= datetime.timedelta(seconds=1)

print 'start_time:', start_time
print 'end_time:', end_time
print 'Domain:', DOMAIN
print 'Account_id:', ACCOUNT_ID

start_time: 2017-07-01 00:00:00
end_time: 2017-07-31 22:59:59
Domain: https://ads-api-sandbox.twitter.com
Account_id: gq183a


In [196]:
# Check that we have access to this :ACCOUNT_ID

resource_path = '/2/accounts/%s' % ACCOUNT_ID
data = get_data(user_twurl, 'GET', headers, DOMAIN + resource_path)

if len(data) == 0:
    print('ERROR: Could not locate :account ID %s' % ACCOUNT_ID)
    sys.exit(0)
else:
    print('You have access to account ID:%s' % ACCOUNT_ID)

You have access to account ID:gq183a


#### Conduct stats check for the account based on the parameters set ####

In [197]:
print("Stats check for :account_id %s" % ACCOUNT_ID)
print '-'*80
print('Start time:\t%s' % start_time)
print('End time:\t%s' % end_time)
print '-'*80

# fetch funding instruments
resource_path = '/2/accounts/%s/funding_instruments?with_deleted=true&count=1000' % ACCOUNT_ID
data = get_data(user_twurl, 'GET', headers, DOMAIN + resource_path)

# filter funding instruments
print("Pre-filtered data:\t\t%s" % len(data))
funding_instruments = check(data, start_time, end_time)
print("Funding instruments:\t\t%s" % len(funding_instruments))

# fetch campaigns
resource_path = '/2/accounts/%s/campaigns?with_deleted=true&count=1000' % ACCOUNT_ID
data = get_data(user_twurl, 'GET', headers, DOMAIN + resource_path)

# filter campaigns
print("Pre-filtered data:\t\t%s" % len(data))
campaigns = check(data, start_time, end_time, 'funding_instrument_id', funding_instruments)
print("Campaigns:\t\t\t%s" % len(campaigns))

# fetch line items
resource_path = '/2/accounts/%s/line_items?with_deleted=true&count=1000' % ACCOUNT_ID
data = get_data(user_twurl, 'GET', headers, DOMAIN + resource_path)

# filter line items
print("Pre-filtered data:\t\t%s" % len(data))
line_items = check(data, start_time, end_time, 'campaign_id', campaigns)
print("Line items:\t\t\t%s" % len(line_items))

# fetch promoted_tweets
resource_path = '/2/accounts/%s/promoted_tweets?with_deleted=true&count=1000' % ACCOUNT_ID
data = get_data(user_twurl, 'GET', headers, DOMAIN + resource_path)

# filter promoted_tweets
print("Pre-filtered data:\t\t%s" % len(data))
promoted_tweets = check(data, start_time, end_time, 'line_item_id', line_items)
print("Promoted Tweets:\t\t%s" % len(promoted_tweets))
print '-'*80

Stats check for :account_id gq183a
--------------------------------------------------------------------------------
Start time:	2017-07-01 00:00:00
End time:	2017-07-31 22:59:59
--------------------------------------------------------------------------------
Pre-filtered data:		1
Funding instruments:		0
Pre-filtered data:		0
Campaigns:			0
Pre-filtered data:		0
Line items:			0
Pre-filtered data:		0
Promoted Tweets:		0
--------------------------------------------------------------------------------


#### Build and make the async report request ####

In [42]:
# initialize the client
client = Client(user_twurl[0], user_twurl[1], user_twurl[2], user_twurl[3])

# load the advertiser account instance available to the current access token
account = client.accounts(ACCOUNT_ID)

# limit request count and grab the first 20 line items from Cursor
line_items = list(account.line_items(None, count=1000))[:20]

# the list of metrics we want to fetch, for a full list of possible metrics
# see: https://dev.twitter.com/ads/analytics/metrics-and-segmentation
metric_groups = [METRIC_GROUP.BILLING,
                 METRIC_GROUP.ENGAGEMENT,
                 METRIC_GROUP.MEDIA,
                 METRIC_GROUP.VIDEO
#                  METRIC_GROUP.WEB_CONVERSION
                ]

# fetching async stats on the instance
ids = map(lambda x: x.id, line_items)

queued_job = LineItem.queue_async_stats_job(account,
                                            ids,
                                            metric_groups,
                                            entity='LineItem',
                                            start_time=start_time,
                                            end_time=end_time,
                                            granularity=granularity)

# get the job_id:
job_id = queued_job['id']

# fetch the response of the specified async job ID using the fetcher function and assign the API response to new 'response' var
response = fetcher(job_id)

# # fetch the results of the specific async job ID directly
# async_stats_job_result = LineItem.async_stats_job_result(account, job_id)
# async_stats_job_result['status']

Job is running..
Fetching the response of the specified async job ID
Done


In [41]:
# # GET stats/jobs/summaries
# # Retrieve a summary of each existing asynchronous analytics job associated with the client app ID making the request. 
# # This endpoint is meant for internal / debugging purposes only.

# resource_path = '/2/stats/jobs/summaries'
# res_headers, response = request(user_twurl, 'GET', headers, 'https://ads-api.twitter.com' + resource_path)
# response

#### Parsing the response ####

In [30]:
# # Inspect the 'data' value in the response
# print 'start_time:', response['request']['params']['start_time']
# print 'end_time' , response['request']['params']['end_time']
# from pprint import pprint
# pprint(response['data'][0] , depth=6)     # inspect first 'row'

In [187]:
# First create a dictionary of dimensional data at the line_item.id level to match up below with the metric data
from collections import defaultdict

# create a dict to contain line_item dims
line_dims = defaultdict(list)
for line_item in line_items:
    line_dims[line_item.id].append((line_item.name, line_item.campaign_id))
    
# create a dict to contain campaign level dims
camp_dims = defaultdict(list)
for campaign in account.campaigns():
    camp_dims[campaign.id].append((campaign.currency, campaign.name))

# # create a dict to contain media dims
# media_dims = {}
# for item in account.media_creatives():
#     media_dims[item.line_item_id] = (item.media_creative_ids,)
    
# bring data into the line_dims dict from camp_dims dict
for line_item_id, values in line_dims.items():
    campaign_id = values[0][1]
    if campaign_id in camp_dims:
        campaign_currency = camp_dims[campaign_id][0][0]
        campaign_name = camp_dims[campaign_id][0][1]
        line_dims[line_item_id].append((campaign_currency, campaign_name))
    if line_item_id in media_dims:
        media_creative_id = media_dims[line_item_id]
    else:
        media_creative_id = None
    line_dims[line_item_id].append(media_creative_id)

In [194]:
# Create a time series to use as the index to the dataframe based on the times returned in the response
start = datetime.datetime.strptime(response['request']['params']['start_time'], '%Y-%m-%dT%H:%M:%SZ')
end = datetime.datetime.strptime(response['request']['params']['end_time'], '%Y-%m-%dT%H:%M:%SZ')
end -= datetime.timedelta(hours=1)

dates = pd.date_range(start=start, end=end, freq='H')

# Build the dataframe(s)
entries = []
for entry in response['data']:
    data = entry['id_data'][0]['metrics']    
    # Pull in the keys for the column headers
    columns = data.keys()
    # Add additional columns
    add_columns = ['date',
                   'advertiser',
                   'advertiser_id',
                   'advertiser_currency',
                   'insertion_order',
                   'insertion_order_id',
                   'line_item',
                   'line_item_id',
                   'creative_id',
                  ]
    columns.extend(add_columns)
    # Create a dataframe where each row equals granularity
    df = pd.DataFrame(data, index=dates, columns=columns)
    # Resample the granularity of the dataframe timeseries to days
    df = df.resample('D').sum()
    
    # Create additional columns
    df['click_rate'] = (df['clicks'] / df['impressions'])
    df['advertiser'] = account.name
    df['advertiser_id'] = account.id
    df['advertiser_currency'] = line_dims[entry['id']][1][0]
    df['insertion_order'] = line_dims[entry['id']][1][1]
    df['insertion_order_id'] = line_dims[entry['id']][0][1]
    df['line_item'] = line_dims[entry['id']][0][0]
    df['line_item_id'] = entry['id']
    df['creative_id'] = line_dims[entry['id']][2]
    df['billed_charge_local_micro'] = (df['billed_charge_local_micro'] / 1000000) # spend

    # Append dataframe to list
    entries.append(df)
    
# Concat into larger dataframe 
dataframe = pd.concat(entries)

# # Preview API response dataframe
# dataframe.head()

#### Take the dataframe and build the report needed ####

In [192]:
# Columns needed for the report
report_columns = ['advertiser',
                'advertiser_id',
                'advertiser_currency',
                'insertion_order',
                'insertion_order_id',
                'line_item',
                'line_item_id',
                'creative',
                'creative_id',
                'impressions',
                'qualified_impressions',
                'clicks',
                'click_rate',
                'total_conversions',
                'post_click_conversions',
                'revenue',
                'billed_charge_local_micro',
                'video_views_25',
                'video_views_75',
                'video_views_50',
                'video_views_100',
                'video_content_starts']

# Copy API response dataframe to Report dataframe
report_df = pd.DataFrame(data=dataframe, columns=report_columns)

# Reset the index & rename to 'date'
report_df.reset_index(inplace=True)
report_df = report_df.rename(columns = {'index':'date'})

# Apply some formatting to the columns strip leading/trailing whitespaces, titlecase, and replace underscore with ' '
report_df.columns = report_df.columns.str.strip().str.title().str.replace('_', ' ')

# # Preview Report dataframe
# report_df.head(3)

In [193]:
# Output to csv
report_df.to_csv('output.csv', index=False, float_format='%.4f')