In [None]:
import requests
import pandas as pd
import time

token="R6xlcruOZi7ymsEZ1qsi1l9mNOV1LrXvK3cLp9J7"
pages=["bbc","natgeo","613870175328566"]

In [None]:
def ct_get_posts(count=100, accounts=None,start_date= None, end_date= None, include_history= None,
                 sort_by="date", types=None, search_term=None, language=None,
                 min_interactions = 0, offset = 0, api_token=None):
    """
    The function retrieves data by CrowdTangle API.

    **Required arguments:**

    - api_token
    - count: [1,100]
    - accounts: a list of the names of public pages or public group ids
    - start_date: the The earliest date at which a post could be posted. Time zone is UTC. 
                  Format is “yyyy-mm-ddThh:mm:ss” or “yyyy-mm-dd” (defaults to time 00:00:00). This must be before end_date. 
    - end_date: The latest date at which a post could be posted. Time zone is UTC. 
                  Format is “yyyy-mm-ddThh:mm:ss” or “yyyy-mm-dd” (defaults to time 00:00:00).

    **Optional arguments:**
    
    - search_term: Returns only posts that match this search term. Terms AND automatically. 
                    Separate with commas for OR, use quotes for phrases. E.g. CrowdTangle API -> AND. 
                    CrowdTangle, API -> OR. "CrowdTangle API" -> AND in that exact order. 
                    You can also use traditional Boolean search with this parameter.
    - language: 2-character locale code
    - include_history: Includes timestep data for growth of each post returned. 
                        Options: True,False
    - types: The types of post to include. These can be separated by commas to include multiple types. 
              If you want all live videos (whether currently or formerly live), be sure to include both live_video and live_video_complete. 
              The "video" type does not mean all videos, it refers to videos that aren't native_video or youtube (e.g. a video on Vimeo).
              Options: "album","igtv","link","live_video","live_video_complete","live_video_scheduled","native_video","photo","status","video","vine","youtube"

    - sort_by: The method by which to filter and order posts. 
                Options: "date", "interaction_rate", "overperforming", "total_interactions", "underperforming".	
    - offset: The number of posts to offset.
   
    """ 

    
    # api-endpoint
    URL_BASE = "https://api.crowdtangle.com/posts"
    # defining a params dict for the parameters to be sent to the API
    PARAMS = {'count': count, 'sortBy':sort_by, 'token': api_token, 
              'minInteractions': min_interactions, 'offset': offset}

    # add params parameters
    if accounts:
        PARAMS["accounts"] = ",".join(accounts)
    if start_date:
        PARAMS['startDate'] = start_date
    if end_date:
        PARAMS['endDate'] = end_date
    if include_history == 'true':
        PARAMS['includeHistory'] = include_history
    if types:
        PARAMS['types'] =  types
    if search_term:
        PARAMS['searchTerm'] =  search_term 
    if language:
        PARAMS['language'] = language

    # sending get request and saving the response as response object
    data = pd.DataFrame()  
    next_page=""  
    count=0

    while next_page or (count==0): 

        if PARAMS:
            r = requests.get(url=URL_BASE, params=PARAMS)
        else:
            r = requests.get(url=URL_BASE)

        time.sleep(10)

        count+=1
        print(count)
        # If the request is invalid
        if r.status_code != 200:
            out = r.json()
            print(f"status: {out['status']}")
            print(f"Code error: {out['code']}")
            print(f"Message: {out['message']}")

        # If the request is valid: get the data, store pagination token for the next request and 
        # concatenate the newly extracted subset to the data mined earlier or an empty pandas dataframe "data"

        else: 
            res_data = r.json()
            pagination = res_data['result']['pagination']

            if 'nextPage' in pagination.keys():
                next_page = pagination['nextPage']
                URL_BASE = next_page
                PARAMS={}
            else:
                next_page=""
            df = pd.DataFrame(res_data['result']['posts'])
            data = pd.concat([data,df],axis=0)
            
    return data.reset_index(drop=True)

def normalize_data(data):
    """
    This function converts columns "account" and "statistics", which are a series of dictionaries, 
    into multiple columns and concatenates them to the initial dataset.
    """

    data = pd.concat([pd.json_normalize(data.account),data],axis=1)
    data = pd.concat([data,pd.json_normalize(data.statistics)],axis=1)
    data = data.drop(["account","statistics"],axis=1)
    return data


In [None]:
data=ct_get_posts(accounts=pages,start_date='2023-04-01',end_date='2023-04-11',api_token=token)
print(data.shape)
data = normalize_data(data)
print(data.shape)

1
2
3
4
5
6
7
8
9
10
(979, 22)
(979, 54)


In [None]:
data.head()

Unnamed: 0,id,name,handle,profileImage,subscriberCount,url,platform,platformId,accountType,pageAdminTopCountry,...,expected.likeCount,expected.shareCount,expected.commentCount,expected.loveCount,expected.wowCount,expected.hahaCount,expected.sadCount,expected.angryCount,expected.thankfulCount,expected.careCount
0,8333,National Geographic,natgeo,https://scontent.xx.fbcdn.net/v/t39.30808-1/28...,50038345,https://www.facebook.com/23497828950,Facebook,100044623170418,facebook_page,US,...,1081,154,36,162,56,4,4,2,0,17
1,7645486,Netflix Recommendations,,https://scontent-sea1-1.xx.fbcdn.net/v/t1.6435...,4560954,https://www.facebook.com/groups/613870175328566,Facebook,613870175328566,facebook_group,,...,13,5,20,5,2,10,1,1,0,2
2,7645486,Netflix Recommendations,,https://scontent-sea1-1.xx.fbcdn.net/v/t1.6435...,4560954,https://www.facebook.com/groups/613870175328566,Facebook,613870175328566,facebook_group,,...,13,5,20,5,2,10,1,1,0,2
3,7645486,Netflix Recommendations,,https://scontent-sea1-1.xx.fbcdn.net/v/t1.6435...,4560954,https://www.facebook.com/groups/613870175328566,Facebook,613870175328566,facebook_group,,...,9,3,17,4,2,10,1,1,0,2
4,7645486,Netflix Recommendations,,https://scontent-sea1-1.xx.fbcdn.net/v/t1.6435...,4560954,https://www.facebook.com/groups/613870175328566,Facebook,613870175328566,facebook_group,,...,13,5,20,5,2,10,1,1,0,2


In [None]:
data_keyword=ct_get_posts(accounts=pages,search_term = "Weather",start_date='2023-04-01',end_date='2023-04-11',api_token=token)
print(data_keyword.shape)
data_keyword = normalize_data(data_keyword)
print(data_keyword.shape)

1
(2, 14)
(2, 42)


In [None]:
data_keyword.message[0]

'Any recommendations for a nice action movies for a cold weather'