## Stack exchange questions (with Paging)

### 1. Import the Libraries

In [1]:
# Import Pandas, Requests, JSON and Time library
import pandas as pd
import requests
import json
import time

### 2. Generate URL request, access its status code attribute and access data

In [2]:
# Response object returned from the API
response_API = requests.get("https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow")

In [3]:
# Print out the status code value
print(response_API.status_code)

200


### 3. Convert JSON string into Python object (Dictionary)

In [4]:
# JSON response of the JSON object of the result
response_json = response_API.json()

# Show the JSON response
response_json

{'items': [{'tags': ['reactjs', 'redux'],
   'owner': {'account_id': 21317036,
    'reputation': 29,
    'user_id': 15690730,
    'user_type': 'registered',
    'profile_image': 'https://lh3.googleusercontent.com/a-/AOh14GhFpV6WzoCMNu5yZwTqfxzXibXYogD-yWPutF0-dQ=k-s128',
    'display_name': 'Tim Taylor',
    'link': 'https://stackoverflow.com/users/15690730/tim-taylor'},
   'is_answered': False,
   'view_count': 12,
   'answer_count': 0,
   'score': 0,
   'last_activity_date': 1633126882,
   'creation_date': 1633126243,
   'last_edit_date': 1633126882,
   'question_id': 69412410,
   'content_license': 'CC BY-SA 4.0',
   'link': 'https://stackoverflow.com/questions/69412410/why-am-i-receiving-invalid-hook-call-error',
   'title': 'Why am I receiving &#39;Invalid hook call&#39; error?'},
  {'tags': ['javascript', 'reactjs', 'api', 'fetch'],
   'owner': {'account_id': 20580108,
    'reputation': 53,
    'user_id': 15106423,
    'user_type': 'registered',
    'profile_image': 'https://i.st

In [5]:
# Show the Keys of the Dictionary as a List
response_json.keys()

dict_keys(['items', 'has_more', 'quota_max', 'quota_remaining'])

In [6]:
# Show the Value of the Key ('items') of the Dictionary as a List
response_json['items']

[{'tags': ['reactjs', 'redux'],
  'owner': {'account_id': 21317036,
   'reputation': 29,
   'user_id': 15690730,
   'user_type': 'registered',
   'profile_image': 'https://lh3.googleusercontent.com/a-/AOh14GhFpV6WzoCMNu5yZwTqfxzXibXYogD-yWPutF0-dQ=k-s128',
   'display_name': 'Tim Taylor',
   'link': 'https://stackoverflow.com/users/15690730/tim-taylor'},
  'is_answered': False,
  'view_count': 12,
  'answer_count': 0,
  'score': 0,
  'last_activity_date': 1633126882,
  'creation_date': 1633126243,
  'last_edit_date': 1633126882,
  'question_id': 69412410,
  'content_license': 'CC BY-SA 4.0',
  'link': 'https://stackoverflow.com/questions/69412410/why-am-i-receiving-invalid-hook-call-error',
  'title': 'Why am I receiving &#39;Invalid hook call&#39; error?'},
 {'tags': ['javascript', 'reactjs', 'api', 'fetch'],
  'owner': {'account_id': 20580108,
   'reputation': 53,
   'user_id': 15106423,
   'user_type': 'registered',
   'profile_image': 'https://i.stack.imgur.com/BHdqr.jpg?s=128&g=1'

In [7]:
# Number of elements in a List
print("Number of elements in the 'items' list is", len(response_json['items']))

Number of elements in the 'items' list is 30


### 4. Paging

In [8]:
# Loop through pages in API

# Define page, pagesize and fromdate parameters for fetching pages of results from the API (according to StackExchange documentation)
page = 1
pagesize = 100
fromdate = 1633046400 # 01.10.2021

has_more = True

# Create empty List
total_results = []

while has_more:   
    # Define the url with page, pagesize and fromdate parameters
    url = f"https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page={page}&pagesize={pagesize}&fromdate={fromdate}"
    print("Requesting:", url)
    
    # Response object returned from the API
    response = requests.get(url)
    
    # JSON response of the JSON object of the result
    response_data = response.json()
    
    # Check if data in 'items' is present
    if len(response_data['items']) == 0:
        # If not, exit the loop
        break
    # If yes, then add the specified list elements (or any iterable) to the end of the current list in order to collect all API results into a List
    total_results.extend(response_data['items'])
    
    # Increaae the page number by 1
    page += 1
    
    # Halt the code execution (timeout for 3 seconds added to prevent rate-limiting)
    time.sleep(3)

Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=1&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=2&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=3&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=4&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=5&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=6&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=7&pagesize=100&fromdate=1633046400
Requesting: https://

Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=60&pagesize=100&fromdate=1633046400
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=61&pagesize=100&fromdate=1633046400


In [9]:
# Number of elements in a List
print(len(total_results))

5974


### 5. Add Data into Pandas DataFrame

#### using .DataFrame()

In [10]:
# Add Data into Pandas DataFrame
df = pd.DataFrame(total_results)

In [11]:
# Print out the Pandas DataFrame
print(df)

                                                   tags  \
0                                      [reactjs, redux]   
1                     [javascript, reactjs, api, fetch]   
2                                                 [css]   
3            [python, sql, python-3.x, sqlite, tkinter]   
4                         [postgresql, database-design]   
...                                                 ...   
5969                         [r, r-markdown, flextable]   
5970                     [python, mysql, excel, pandas]   
5971  [android, flutter, dart, push-notification, fi...   
5972                           [java, logic, math.sqrt]   
5973                                       [java, json]   

                                                  owner  is_answered  \
0     {'account_id': 21317036, 'reputation': 29, 'us...        False   
1     {'account_id': 20580108, 'reputation': 53, 'us...         True   
2     {'account_id': 5157897, 'reputation': 217, 'us...        False   
3  

In [12]:
# Display the few top rows of Pandas DataFrame
df.head()

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,last_edit_date,question_id,content_license,link,title,accepted_answer_id,closed_date,closed_reason,migrated_to,locked_date,migrated_from
0,"[reactjs, redux]","{'account_id': 21317036, 'reputation': 29, 'us...",False,12,0,0,1633126882,1633126243,1633127000.0,69412410,CC BY-SA 4.0,https://stackoverflow.com/questions/69412410/w...,Why am I receiving &#39;Invalid hook call&#39;...,,,,,,
1,"[javascript, reactjs, api, fetch]","{'account_id': 20580108, 'reputation': 53, 'us...",True,41,4,1,1633126878,1633119975,,69411652,CC BY-SA 4.0,https://stackoverflow.com/questions/69411652/h...,How to use two map function with two fetching url,69412270.0,,,,,
2,[css],"{'account_id': 5157897, 'reputation': 217, 'us...",False,8,1,0,1633126877,1633126410,,69412432,CC BY-SA 4.0,https://stackoverflow.com/questions/69412432/w...,Why isn&#39;t the background color in my nav r...,,,,,,
3,"[python, sql, python-3.x, sqlite, tkinter]","{'account_id': 22753997, 'reputation': 25, 'us...",False,17,0,0,1633126873,1633123896,1633127000.0,69412157,CC BY-SA 4.0,https://stackoverflow.com/questions/69412157/p...,Problem saving field in database: insert 2 fie...,,,,,,
4,"[postgresql, database-design]","{'account_id': 6515552, 'reputation': 1, 'user...",False,2,0,0,1633126872,1633126872,,69412480,CC BY-SA 4.0,https://stackoverflow.com/questions/69412480/i...,Is it possible to build a table of device inte...,,,,,,


In [13]:
# Display the few last rows of Pandas DataFrame
df.tail()

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,last_edit_date,question_id,content_license,link,title,accepted_answer_id,closed_date,closed_reason,migrated_to,locked_date,migrated_from
5969,"[r, r-markdown, flextable]","{'account_id': 18059282, 'reputation': 135, 'u...",False,13,0,0,1633046660,1633046660,,69399943,CC BY-SA 4.0,https://stackoverflow.com/questions/69399943/i...,Is there a way to automatically position and f...,,,,,,
5970,"[python, mysql, excel, pandas]","{'account_id': 16609211, 'reputation': 43, 'us...",False,9,0,0,1633046646,1633046646,,69399939,CC BY-SA 4.0,https://stackoverflow.com/questions/69399939/h...,How to delete duplicate rows from table in SQL...,,,,,,
5971,"[android, flutter, dart, push-notification, fi...","{'account_id': 20905383, 'reputation': 1, 'use...",False,11,0,0,1633046546,1633046546,,69399931,CC BY-SA 4.0,https://stackoverflow.com/questions/69399931/t...,Trying to run periodic code in background of f...,,,,,,
5972,"[java, logic, math.sqrt]","{'account_id': 22458868, 'reputation': 5, 'use...",False,27,0,0,1633046495,1633046495,,69399928,CC BY-SA 4.0,https://stackoverflow.com/questions/69399928/t...,The logic sqrt method,,,,,,
5973,"[java, json]","{'account_id': 19782045, 'reputation': 347, 'u...",False,24,0,0,1633046432,1633046432,,69399926,CC BY-SA 4.0,https://stackoverflow.com/questions/69399926/d...,Deserializing a Json with lists of objects usi...,,,,,,


### 6. Pandas DataFrame to .csv file

In [14]:
# Write Pandas DataFrame to .csv file
df.to_csv('C:/Users/Administrator/Desktop/Data Engineer/stack_exchange_questions.csv', index = False)