# Python: Reddit API authentication and scraping

**Goal**: connect to the **reddit API** to extract valuable data!

## Authentication to reddit API

In [1]:
# We import the request libraries
import requests
import requests.auth as auth

In [2]:
# We use the HTTPBasicAuth() method to add the credentials data
client_auth = auth.HTTPBasicAuth('oRoFZ7oCbCmEYwJH3oJ4VA', 'iFs3ab5FoDIKNbVF6HIillJyPGrqRQ')
# We add the username and password of our reddit account
credentials_data = {
    "grant_type" : "password",
    "username" : "Mohamed_niang",
    "password" : "RedditAuthenticationApi0"
}
# We add a name for identification
headers = {"User-agent" : "Authentication Reddit API"}

In [3]:
# We generate a token with all the previous information
response = requests.post("https://www.reddit.com/api/v1/access_token",
                        auth=client_auth,
                        data=credentials_data,
                        headers=headers)

In [4]:
response_json = response.json()
response_json

{'access_token': '487888274754-Nj8jiWAufSf6mW8E7eBTWLecwUxLRQ',
 'token_type': 'bearer',
 'expires_in': 3600,
 'scope': '*'}

In [5]:
headers = {
    "authorization" : f"bearer {response_json['access_token']}", 
    "User-agent" : "Authentication Reddit API"
}

# We add a parameter to target the last day
params = {"t" : "day"}

Let's apply a **GET request** with the headers and params parameters in order to **retrieve** the **most popular Lex Fridman posts** on reddit.

In [6]:
lex_response = requests.get("https://oauth.reddit.com/r/lexfridman/top", headers=headers, params=params)

In [7]:
lex_top_posts = lex_response.json()
lex_top_posts

{'kind': 'Listing',
 'data': {'after': None,
  'dist': 1,
  'modhash': None,
  'geo_filter': '',
  'children': [{'kind': 't3',
    'data': {'approved_at_utc': None,
     'subreddit': 'lexfridman',
     'selftext': '',
     'author_fullname': 't2_75fskj10',
     'saved': False,
     'mod_reason_title': None,
     'gilded': 0,
     'clicked': False,
     'title': 'Georges St-Pierre, John Danaher &amp; Gordon Ryan: The Greatest of All Time | Lex Fridman Podcast #260',
     'link_flair_richtext': [],
     'subreddit_name_prefixed': 'r/lexfridman',
     'hidden': False,
     'pwls': 6,
     'link_flair_css_class': None,
     'downs': 0,
     'thumbnail_height': 105,
     'top_awarded_type': None,
     'hide_score': False,
     'name': 't3_sgio9h',
     'quarantine': False,
     'link_flair_text_color': 'dark',
     'upvote_ratio': 0.98,
     'author_flair_background_color': None,
     'subreddit_type': 'public',
     'ups': 41,
     'total_awards_received': 0,
     'media_embed': {'content'

## Extract the most popular post

In [8]:
lex_top_posts_data = lex_top_posts['data']['children']
lex_top_posts_data

[{'kind': 't3',
  'data': {'approved_at_utc': None,
   'subreddit': 'lexfridman',
   'selftext': '',
   'author_fullname': 't2_75fskj10',
   'saved': False,
   'mod_reason_title': None,
   'gilded': 0,
   'clicked': False,
   'title': 'Georges St-Pierre, John Danaher &amp; Gordon Ryan: The Greatest of All Time | Lex Fridman Podcast #260',
   'link_flair_richtext': [],
   'subreddit_name_prefixed': 'r/lexfridman',
   'hidden': False,
   'pwls': 6,
   'link_flair_css_class': None,
   'downs': 0,
   'thumbnail_height': 105,
   'top_awarded_type': None,
   'hide_score': False,
   'name': 't3_sgio9h',
   'quarantine': False,
   'link_flair_text_color': 'dark',
   'upvote_ratio': 0.98,
   'author_flair_background_color': None,
   'subreddit_type': 'public',
   'ups': 41,
   'total_awards_received': 0,
   'media_embed': {'content': '&lt;iframe width="356" height="200" src="https://www.youtube.com/embed/KdmDtqB46Jc?feature=oembed&amp;enablejsapi=1" frameborder="0" allow="accelerometer; autopla

In [9]:
most_upvotes_id = ""
most_upvotes_score = 0

for post in lex_top_posts_data:
    post_data = post['data']
    if post_data['ups'] >= most_upvotes_score:
        most_upvotes_id = post_data['id']
        most_upvotes_score = post_data['ups']

In [10]:
most_upvotes_id, most_upvotes_score

('sgio9h', 41)

## Extract all comments from the post

In [11]:
lex_comments_response = requests.get("https://oauth.reddit.com/r/lexfridman/comments/sgio9h", 
                                     headers=headers, 
                                     params=params)

In [12]:
comments_lex_top_posts = lex_comments_response.json()
comments_lex_top_posts

[{'kind': 'Listing',
  'data': {'after': None,
   'dist': 1,
   'modhash': None,
   'geo_filter': '',
   'children': [{'kind': 't3',
     'data': {'approved_at_utc': None,
      'subreddit': 'lexfridman',
      'selftext': '',
      'user_reports': [],
      'saved': False,
      'mod_reason_title': None,
      'gilded': 0,
      'clicked': False,
      'title': 'Georges St-Pierre, John Danaher &amp; Gordon Ryan: The Greatest of All Time | Lex Fridman Podcast #260',
      'link_flair_richtext': [],
      'subreddit_name_prefixed': 'r/lexfridman',
      'hidden': False,
      'pwls': 6,
      'link_flair_css_class': None,
      'downs': 0,
      'thumbnail_height': 105,
      'top_awarded_type': None,
      'parent_whitelist_status': 'all_ads',
      'hide_score': False,
      'name': 't3_sgio9h',
      'quarantine': False,
      'link_flair_text_color': 'dark',
      'upvote_ratio': 0.98,
      'author_flair_background_color': None,
      'subreddit_type': 'public',
      'ups': 43,
  

## Extract the most popular comment

In [13]:
lex_top_comments_data = comments_lex_top_posts[1]['data']['children']
lex_top_comments_data

[{'kind': 't1',
  'data': {'subreddit_id': 't5_2777mj',
   'approved_at_utc': None,
   'author_is_blocked': False,
   'comment_type': None,
   'awarders': [],
   'mod_reason_by': None,
   'banned_by': None,
   'author_flair_type': 'text',
   'total_awards_received': 0,
   'subreddit': 'lexfridman',
   'author_flair_template_id': None,
   'likes': None,
   'replies': '',
   'user_reports': [],
   'saved': False,
   'id': 'huwshv9',
   'banned_at_utc': None,
   'mod_reason_title': None,
   'gilded': 0,
   'archived': False,
   'collapsed_reason_code': None,
   'no_follow': False,
   'author': 'BruhThatIsCrazy',
   'can_mod_post': False,
   'created_utc': 1643580749.0,
   'send_replies': True,
   'parent_id': 't3_sgio9h',
   'score': 11,
   'author_fullname': 't2_5t56gef9',
   'approved_by': None,
   'mod_note': None,
   'all_awardings': [],
   'collapsed': False,
   'body': 'absolute banger of a podcast lineup',
   'edited': False,
   'top_awarded_type': None,
   'author_flair_css_class'

In [14]:
most_upvotes_comments_id = ""
most_upvotes_comments_score = 0

for comment in lex_top_comments_data:
    comment_data = comment['data']
    if comment_data['ups'] >= most_upvotes_comments_score:
        most_upvotes_comments_id = comment_data['id']
        most_upvotes_comments_score = comment_data['ups']

In [15]:
most_upvotes_comments_id, most_upvotes_comments_score

('huwshv9', 11)