### Basic request example

In [1]:
from datetime import datetime, timezone
import requests

parameters = {
    "subreddit": "europe",
    "q": "russia*|ruzzia*|kremlin|putin*|putler*",
#     "q":"sdfsdgf",
    "after": int(datetime(2022, 3, 24, tzinfo=timezone.utc).timestamp()),
    "metadata": True,
#     "metadata": False,
    "size": 300
    
}

endpoint = "https://api.pushshift.io/reddit/search/comment/"
# query = "&".join([f"{key}={value}" for key, value in parameters.items()])
# url = endpoint + "?" + query

response = requests.get(endpoint, params=parameters)
print(response.status_code)
response = response.json()

200


#### Inspecting response

In [2]:
response

{'data': [{'all_awardings': [],
   'archived': False,
   'associated_award': None,
   'author': 'Cefalopodul',
   'author_flair_background_color': None,
   'author_flair_css_class': 'ROMA',
   'author_flair_richtext': [{'e': 'text', 't': 'Crisana(Romania)'}],
   'author_flair_template_id': '7ed794f6-2f83-11e7-bfdc-0e2e4867549a',
   'author_flair_text': 'Crisana(Romania)',
   'author_flair_text_color': 'dark',
   'author_flair_type': 'richtext',
   'author_fullname': 't2_18qw8bow',
   'author_patreon_flair': False,
   'author_premium': False,
   'body': 'Pro-Kremlin website.',
   'body_sha1': '500825cfd268e99355ae81f711233230c89a264c',
   'can_gild': True,
   'collapsed': False,
   'collapsed_because_crowd_control': None,
   'collapsed_reason': None,
   'collapsed_reason_code': None,
   'comment_type': None,
   'controversiality': 0,
   'created_utc': 1648080074,
   'distinguished': None,
   'gilded': 0,
   'gildings': {},
   'id': 'i1v7uya',
   'is_submitter': False,
   'link_id': 't3_

In [3]:
bool(response["data"])

True

### Cleaning response data

#### Two distinct dictionaries in response variable - `data` and `metadata`

In [4]:
import pandas as pd

comments = pd.DataFrame(response["data"])
metadata = response["metadata"]

#### Inspecting `metadata`

In [5]:
import pprint

pp = pprint.PrettyPrinter()
pp.pprint(metadata)

{'after': 1648080000,
 'agg_size': 100,
 'api_version': '3.0',
 'before': None,
 'es_query': {'query': {'bool': {'filter': {'bool': {'must': [{'terms': {'subreddit': ['europe']}},
                                                              {'range': {'created_utc': {'gt': 1648080000}}},
                                                              {'simple_query_string': {'default_operator': 'and',
                                                                                       'fields': ['body'],
                                                                                       'query': 'russia*|ruzzia*|kremlin|putin*|putler*'}}],
                                                     'should': []}},
                                 'must_not': []}},
              'size': 250,
              'sort': {'created_utc': 'asc'}},
 'execution_time_milliseconds': 3361.58,
 'index': 'rc_delta3',
 'metadata': 'True',
 'q': 'russia*|ruzzia*|kremlin|putin*|putler*',
 'ranges': [{'range':

#### Inspecting `data`

In [6]:
comments.dtypes

all_awardings                      object
archived                             bool
associated_award                   object
author                             object
author_flair_background_color      object
author_flair_css_class             object
author_flair_richtext              object
author_flair_template_id           object
author_flair_text                  object
author_flair_text_color            object
author_flair_type                  object
author_fullname                    object
author_patreon_flair                 bool
author_premium                       bool
body                               object
body_sha1                          object
can_gild                             bool
collapsed                            bool
collapsed_because_crowd_control    object
collapsed_reason                   object
collapsed_reason_code              object
comment_type                       object
controversiality                    int64
created_utc                       

#### Sample comments

In [7]:
from IPython.display import display

def timestamp_to_ISO8601(timestamp):
    dt_obj = datetime.fromtimestamp(timestamp)
    return dt_obj.astimezone(tz=timezone.utc).isoformat()

comments["created_utc_str"] = comments.created_utc.apply(timestamp_to_ISO8601)

#rel_columns = ["created_utc", "author_flair_css_class", "author_flair_text", "body"]
rel_columns = ["created_utc", "created_utc_str", "author_flair_css_class", "author_flair_text", "body"]

with pd.option_context("display.max_colwidth", None):
    display(comments[rel_columns])

Unnamed: 0,created_utc,created_utc_str,author_flair_css_class,author_flair_text,body
0,1648080074,2022-03-24T00:01:14+00:00,ROMA,Crisana(Romania),Pro-Kremlin website.
1,1648080170,2022-03-24T00:02:50+00:00,SWED,Sweden,Nah. Russia still needs to withdraw from the treaty.
2,1648080280,2022-03-24T00:04:40+00:00,,,"Decathlon has 60 stores, less than Metro (93 stores) and Spar (68 stores) while M&amp;S has 40 stores, not many less. Spar actually controls some of their stores directly contrary to what you said. Do you really think that M&amp;S can't stop supplying their Russian stores? Or that Burger King has to yield to the power of their individual franchisees? McDonald's closed, despite having a franchise model.Your rational is flawed."
3,1648080283,2022-03-24T00:04:43+00:00,GREE,Greece,"And because of Germany the EU was stagnant for years, even though France pushed for reforms. With COVID pushing the German economy, they saddened loosened up their stance.\n\nAnd now that Russia is invading a country pretty close to Germany, it’s stance changed, again. It would be great if we were thinking ahead instead of reacting, but I’ll get any progress."
4,1648080390,2022-03-24T00:06:30+00:00,ROMA,Romania,Putin: I see this as an absolute win.
...,...,...,...,...,...
244,1648098299,2022-03-24T05:04:59+00:00,,,"Well considering Russia's losses and stagnation in Ukraine... Also, they allegedly used a hypersonic missile."
245,1648098320,2022-03-24T05:05:20+00:00,,,And yet europe has the audacity to demand India to denounce russia.....
246,1648098360,2022-03-24T05:06:00+00:00,CZEC,Czech Republic,"Would be probably true in long-term...\nIf US became something like EU, it would probably solve some of their issues as well (like the two party system)\n\nBut right now Russia and China are far, far bigger problem... and sadly, without US they would probably get much worse..."
247,1648098440,2022-03-24T05:07:20+00:00,HUNG,Hungary,"Yeah man, Russia is a joke ;-) Don't worry about a thing. It'll be easy. Certainly no consequences for affluent liberals in faraway countries that would never go anywhere near a conflict, but have no problem risking other people's lives."


### Prototyping  reusable functions

In [8]:
import time
from datetime import timedelta
from functools import partial, wraps


def timeit(f):
    @wraps(f)
    def wrap(*args, **kwargs):
        t0 = time.time()
        result = f(*args, **kwargs)
        t1 = time.time()
        print(("Function {f_name} args:[{args}, {kwargs}] took: {time:.2f} secs."
                   .format(f_name=f.__name__, args=args, kwargs=kwargs, time=t1-t0)))
        return result
    return wrap


def format_query_params(after, before=None, include_metadata=False,
                        query_str="russia*|ruzzia*|kremlin|putin*|putler*",
                        default_params=dict(subreddit="europe", size=300)):
                        
    params = {**default_params, "q": query_str, "metadata": include_metadata, 
              "after": after}
    if before is not None:
        params["before"] = before
    return params


def send_request(parameters, timeout=10, endpoint="https://api.pushshift.io/reddit/search/comment/"):
    response = requests.get(endpoint, params=parameters)
    
    if response.status_code == 200:
        response = response.json()
        metadata = response["metadata"] if "metadata" in response.keys() else None
        data = response["data"]
        return (metadata, data)
    
    elif response.status_code == 429:
        print(f"Rate limit reached, sleeping for {timeout} secs.")
        time.sleep(timeout)
        return send_request(parameters)
    
    elif response.status_code >= 500:
        print(f"Server error (HTTP {response.status_code}), sleeping for {timeout} secs.")
        time.sleep(timeout)
        return send_request(parameters)
    
    else:
        raise NotImplementedError("HTTP status code {}".format(response.status_code))
    
@timeit   
def get_daily_comments(date_str):
    after_datetime = datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc)
    before_datetime = after_datetime + timedelta(days=1)
    format_subseq_query_params = partial(format_query_params, 
                                         before=int(before_datetime.timestamp()))
        
    params = format_query_params(after=int(after_datetime.timestamp()),
                                 before=int(before_datetime.timestamp()),
                                 include_metadata=True)
    
    
    metadata, comments = send_request(params)
    iteration = 0
    print(f"#{iteration+1}: {len(comments)} ({len(comments)}/{metadata['total_results']})")

    while True:
        params = format_subseq_query_params(after=int(comments[-1]["created_utc"]))
        _, data = send_request(params)
        
        if bool(data):
            comments += list(data)
            iteration += 1
            print(f"#{iteration+1}: {len(data)} ({len(comments)}/{metadata['total_results']})")
        else:
            break
    
    return metadata, comments
    

In [9]:
metadata, comments = get_daily_comments("2022-03-24")

#1: 249 (249/2139)
#2: 250 (499/2139)
#3: 250 (749/2139)
#4: 249 (998/2139)
#5: 248 (1246/2139)
#6: 250 (1496/2139)
#7: 250 (1746/2139)
#8: 250 (1996/2139)
#9: 139 (2135/2139)
Function get_daily_comments args:[('2022-03-24',), {}] took: 75.85 secs.


In [10]:
pp.pprint(metadata)

{'after': 1648080000,
 'agg_size': 100,
 'api_version': '3.0',
 'before': 1648166400,
 'es_query': {'query': {'bool': {'filter': {'bool': {'must': [{'terms': {'subreddit': ['europe']}},
                                                              {'range': {'created_utc': {'gt': 1648080000}}},
                                                              {'range': {'created_utc': {'lt': 1648166400}}},
                                                              {'simple_query_string': {'default_operator': 'and',
                                                                                       'fields': ['body'],
                                                                                       'query': 'russia*|ruzzia*|kremlin|putin*|putler*'}}],
                                                     'should': []}},
                                 'must_not': []}},
              'size': 250,
              'sort': {'created_utc': 'asc'}},
 'execution_time_milliseconds': 1780.5

#### Inspecting results

In [11]:
comments = pd.DataFrame(comments)

comments["created_utc_str"] = comments.created_utc.apply(timestamp_to_ISO8601)
rel_columns = ["created_utc", "created_utc_str", "author_flair_css_class", "author_flair_text", "body"]
with pd.option_context("display.max_colwidth", None):
    display(comments[rel_columns])

Unnamed: 0,created_utc,created_utc_str,author_flair_css_class,author_flair_text,body
0,1648080074,2022-03-24T00:01:14+00:00,ROMA,Crisana(Romania),Pro-Kremlin website.
1,1648080170,2022-03-24T00:02:50+00:00,SWED,Sweden,Nah. Russia still needs to withdraw from the treaty.
2,1648080280,2022-03-24T00:04:40+00:00,,,"Decathlon has 60 stores, less than Metro (93 stores) and Spar (68 stores) while M&amp;S has 40 stores, not many less. Spar actually controls some of their stores directly contrary to what you said. Do you really think that M&amp;S can't stop supplying their Russian stores? Or that Burger King has to yield to the power of their individual franchisees? McDonald's closed, despite having a franchise model.Your rational is flawed."
3,1648080283,2022-03-24T00:04:43+00:00,GREE,Greece,"And because of Germany the EU was stagnant for years, even though France pushed for reforms. With COVID pushing the German economy, they saddened loosened up their stance.\n\nAnd now that Russia is invading a country pretty close to Germany, it’s stance changed, again. It would be great if we were thinking ahead instead of reacting, but I’ll get any progress."
4,1648080390,2022-03-24T00:06:30+00:00,ROMA,Romania,Putin: I see this as an absolute win.
...,...,...,...,...,...
2130,1648166191,2022-03-24T23:56:31+00:00,,,Rise Against the Russians?
2131,1648166199,2022-03-24T23:56:39+00:00,,,Rise Against the Russians?
2132,1648166338,2022-03-24T23:58:58+00:00,,,"Idk what Ukraine would need to do this, but someone needs to provide them the means to destroy the Kerch Bridge. Clear the coast of Russian ships, destroy the bridge and besiege Russian forces in Crimea."
2133,1648166351,2022-03-24T23:59:11+00:00,,,"You can accuse Russia of many things, but being self-aware is not one of them."


In [12]:
with pd.option_context("display.max_colwidth", None):
    display(comments.loc[comments.author_flair_css_class.notna(), rel_columns])

Unnamed: 0,created_utc,created_utc_str,author_flair_css_class,author_flair_text,body
0,1648080074,2022-03-24T00:01:14+00:00,ROMA,Crisana(Romania),Pro-Kremlin website.
1,1648080170,2022-03-24T00:02:50+00:00,SWED,Sweden,Nah. Russia still needs to withdraw from the treaty.
3,1648080283,2022-03-24T00:04:43+00:00,GREE,Greece,"And because of Germany the EU was stagnant for years, even though France pushed for reforms. With COVID pushing the German economy, they saddened loosened up their stance.\n\nAnd now that Russia is invading a country pretty close to Germany, it’s stance changed, again. It would be great if we were thinking ahead instead of reacting, but I’ll get any progress."
4,1648080390,2022-03-24T00:06:30+00:00,ROMA,Romania,Putin: I see this as an absolute win.
7,1648080458,2022-03-24T00:07:38+00:00,FR-LORR,Lorraine (France),France doesn’t sell weapons to Russia. France doesn’t finance the Russian war like Germany still does when it but gas from Russia.
...,...,...,...,...,...
2117,1648165198,2022-03-24T23:39:58+00:00,ISRA,Israel,"what do Poles view as Russians fighting Germans in Poland (ie liberation)?\n\nnot trying to accuse just trying to understand, as the alternative to Soviet invasion was German murderous rule"
2119,1648165695,2022-03-24T23:48:15+00:00,DE-BY,Bavaria (Germany),Kozyrev was right\n\nRussian military budget paid for some nice yachts in Cyprus
2120,1648165699,2022-03-24T23:48:19+00:00,NORW,Norway,I'd been wondering why this wasn't already happening. A few dozen infantry carried hellfire missiles could work wonders along the coast. Possibly Nato thinks it's too risky to be seen to kill Russian ships by proxy? Maybe this is an escalation option in case of chem/bio weapons use by Russia?
2121,1648165703,2022-03-24T23:48:23+00:00,POLA,Poland,&gt;what do Poles view as Russians fighting Germans in Poland\n\nIn the end? Occupation.


#### Inspecting user flairs

In [13]:
has_flair = comments.author_flair_css_class.notna()

with pd.option_context("display.max_rows", None):
    display(comments[has_flair].groupby("author_flair_css_class").size())

author_flair_css_class
             4
AMST         2
ASTR        11
AT-2         2
AUST         4
BELA         7
BELG        12
BOSN         1
BRAZ         9
BRUX         1
BULG         9
CANA        17
CH-GE        1
CHIL         1
CROA        24
CYPR         1
CZ-10        3
CZEC         9
DE-BB        1
DE-BE        5
DE-BW        1
DE-BY        7
DE-HH        9
DE-NW        7
DE-SH        4
DENK        38
EART        19
EMRM         2
ENGL        10
ES-AN        1
ES-CT        1
ES-GA        3
ES-MD        1
ESPA        20
ESTO         2
EURO       153
FINL        31
FLAN         1
FR-AQUI      1
FR-IDFR      2
FR-LORR      7
FR-MPYR      4
FR-PACA      3
FRAN        17
FRNK         1
GEOR         7
GERM        35
GIBR         1
GREE         6
HUNG         9
ICEL         8
IREL         5
ISRA         5
ITAL        24
JAPA         2
LATV         1
LAZI         2
LITH        12
LOMB         2
LUXE         2
MOLD         1
MONT         7
MORO         1
NETH        40
NEWZ         2
NL

In [14]:
with pd.option_context("display.max_rows", None):
    display((comments[has_flair].groupby("author_flair_css_class")["author_flair_text"].agg(pd.Series.mode)))

author_flair_css_class
                                                            
AMST                                               Amsterdam
ASTR                                                 Austria
AT-2                                     Carinthia (Austria)
AUST                                               Australia
BELA                                                 Belarus
BELG                                                 Belgium
BOSN                                                 Bosnia 
BRAZ       Brazil "What is a Brazilian doing modding r/eu...
BRUX                                      Brussels (Belgium)
BULG                                             Bulgaria/US
CANA                                                  Canada
CH-GE            Republic and Canton of Geneva (Switzerland)
CHIL                   Poland if it was colonized by Somalia
CROA                                                 Croatia
CYPR                                                  Cyprus
C