## Stack exchange questions (with Paging)

### 1. Import the Libraries

In [1]:
# Import Pandas, Requests, JSON and Time library
import pandas as pd
import requests
import json
import time

### 2. Generate URL request, access its status code attribute and access data

In [2]:
# Response object returned from the API
response_API = requests.get("https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow")

In [3]:
# Print out the status code value
print(response_API.status_code)

200


### 3. Convert JSON string into Python object (Dictionary)

In [4]:
# JSON response of the JSON object of the result
response_json = response_API.json()

# Show the JSON response
response_json

{'items': [{'tags': ['node.js', 'typescript', 'testing', 'jestjs'],
   'owner': {'account_id': 19362206,
    'reputation': 21,
    'user_id': 14157249,
    'user_type': 'registered',
    'profile_image': 'https://www.gravatar.com/avatar/8cb46049f5947f1a51193529697693ba?s=128&d=identicon&r=PG&f=1',
    'display_name': 'Steve',
    'link': 'https://stackoverflow.com/users/14157249/steve'},
   'is_answered': False,
   'view_count': 2,
   'answer_count': 0,
   'score': 0,
   'last_activity_date': 1633155614,
   'creation_date': 1633155614,
   'question_id': 69414512,
   'content_license': 'CC BY-SA 4.0',
   'link': 'https://stackoverflow.com/questions/69414512/how-can-i-handle-jest-has-detected-the-following-open-handles-potentially-keepin',
   'title': 'How can i handle jest has detected the following open handles potentially keeping Jest from exiting when using node-ftech'},
  {'tags': ['node.js', 'reactjs', 'npm', 'publish', 'lint'],
   'owner': {'account_id': 16270137,
    'reputation'

In [5]:
# Show the Keys of the Dictionary as a List
response_json.keys()

dict_keys(['items', 'has_more', 'quota_max', 'quota_remaining'])

In [6]:
# Show the Value of the Key ('items') of the Dictionary as a List
response_json['items']

[{'tags': ['node.js', 'typescript', 'testing', 'jestjs'],
  'owner': {'account_id': 19362206,
   'reputation': 21,
   'user_id': 14157249,
   'user_type': 'registered',
   'profile_image': 'https://www.gravatar.com/avatar/8cb46049f5947f1a51193529697693ba?s=128&d=identicon&r=PG&f=1',
   'display_name': 'Steve',
   'link': 'https://stackoverflow.com/users/14157249/steve'},
  'is_answered': False,
  'view_count': 2,
  'answer_count': 0,
  'score': 0,
  'last_activity_date': 1633155614,
  'creation_date': 1633155614,
  'question_id': 69414512,
  'content_license': 'CC BY-SA 4.0',
  'link': 'https://stackoverflow.com/questions/69414512/how-can-i-handle-jest-has-detected-the-following-open-handles-potentially-keepin',
  'title': 'How can i handle jest has detected the following open handles potentially keeping Jest from exiting when using node-ftech'},
 {'tags': ['node.js', 'reactjs', 'npm', 'publish', 'lint'],
  'owner': {'account_id': 16270137,
   'reputation': 1167,
   'user_id': 11749004

In [7]:
# Number of elements in a List
print("Number of elements in the 'items' list is", len(response_json['items']))

Number of elements in the 'items' list is 30


### 4. Paging

In [8]:
# Loop through pages in API

# Define page, pagesize and fromdate parameters for fetching pages of results from the API (according to StackExchange documentation)
page = 1
pagesize = 100
fromdate = 1633132800 # 02.10.2021

has_more = True

# Create empty List
total_results = []

while has_more:   
    # Define the url with page, pagesize and fromdate parameters
    url = f"https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page={page}&pagesize={pagesize}&fromdate={fromdate}"
    print("Requesting:", url)
    
    # Response object returned from the API
    response = requests.get(url)
    
    # JSON response of the JSON object of the result
    response_data = response.json()
    
    # Check if data in 'items' is present
    if len(response_data['items']) == 0:
        # If not, exit the loop
        break
    # If yes, then add the specified list elements (or any iterable) to the end of the current list in order to collect all API results into a List
    total_results.extend(response_data['items'])
    
    # Increaae the page number by 1
    page += 1
    
    # Halt the code execution (timeout for 3 seconds added to prevent rate-limiting)
    time.sleep(3)

Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=1&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=2&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=3&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=4&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=5&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=6&pagesize=100&fromdate=1633132800
Requesting: https://api.stackexchange.com/2.3/questions?order=desc&sort=activity&site=stackoverflow&page=7&pagesize=100&fromdate=1633132800
Requesting: https://

In [9]:
# Number of elements in a List
print(len(total_results))

753


### 5. Add Data into Pandas DataFrame

#### using .DataFrame()

In [10]:
# Add Data into Pandas DataFrame
df = pd.DataFrame(total_results)

In [11]:
# Print out the Pandas DataFrame
print(df)

                                             tags  \
0          [node.js, typescript, testing, jestjs]   
1                  [javascript, node.js, cheerio]   
2                                        [python]   
3                                          [java]   
4    [java, eclipse, while-loop, syntax, integer]   
..                                            ...   
748    [splunk, splunk-query, splunk-calculation]   
749     [gradle, build.gradle, gradle-kotlin-dsl]   
750             [mysql, docker, pyspark, airflow]   
751                              [c++, rapidjson]   
752                                [r, stargazer]   

                                                 owner  is_answered  \
0    {'account_id': 19362206, 'reputation': 21, 'us...        False   
1    {'account_id': 13391178, 'reputation': 2691, '...        False   
2    {'account_id': 22248294, 'reputation': 1, 'use...        False   
3    {'account_id': 5672211, 'reputation': 4058, 'u...        False   
4    {'a

In [12]:
# Display the few top rows of Pandas DataFrame
df.head()

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,question_id,content_license,link,title,last_edit_date,accepted_answer_id,closed_date,closed_reason
0,"[node.js, typescript, testing, jestjs]","{'account_id': 19362206, 'reputation': 21, 'us...",False,2,0,0,1633155614,1633155614,69414512,CC BY-SA 4.0,https://stackoverflow.com/questions/69414512/h...,How can i handle jest has detected the followi...,,,,
1,"[javascript, node.js, cheerio]","{'account_id': 13391178, 'reputation': 2691, '...",False,11,0,0,1633155582,1633152100,69414213,CC BY-SA 4.0,https://stackoverflow.com/questions/69414213/t...,TypeError: dom.getElementsByTagName is not a f...,1633156000.0,,,
2,[python],"{'account_id': 22248294, 'reputation': 1, 'use...",False,4,0,0,1633155563,1633155563,69414509,CC BY-SA 4.0,https://stackoverflow.com/questions/69414509/h...,Having a problem in python: Showing error can ...,,,,
3,[java],"{'account_id': 5672211, 'reputation': 4058, 'u...",False,35,2,0,1633155554,1633151930,69414200,CC BY-SA 4.0,https://stackoverflow.com/questions/69414200/r...,remove data from one list with another list da...,,,,
4,"[java, eclipse, while-loop, syntax, integer]","{'account_id': 22920572, 'reputation': 1, 'use...",False,4,0,0,1633155545,1633155545,69414508,CC BY-SA 4.0,https://stackoverflow.com/questions/69414508/w...,why it is showing syntax error like &quot;Synt...,,,,


In [13]:
# Display the few last rows of Pandas DataFrame
df.tail()

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,question_id,content_license,link,title,last_edit_date,accepted_answer_id,closed_date,closed_reason
748,"[splunk, splunk-query, splunk-calculation]","{'account_id': 3106206, 'reputation': 7429, 'u...",False,7,0,0,1633133000,1633133000,69412986,CC BY-SA 4.0,https://stackoverflow.com/questions/69412986/s...,Splunk Concurrency Calculation,,,,
749,"[gradle, build.gradle, gradle-kotlin-dsl]","{'account_id': 15084680, 'reputation': 43, 'us...",False,5,0,0,1633132999,1633132999,69412984,CC BY-SA 4.0,https://stackoverflow.com/questions/69412984/g...,gradlew kts multi project outrput jar,,,,
750,"[mysql, docker, pyspark, airflow]","{'account_id': 20840070, 'reputation': 1, 'use...",False,13,0,0,1633132936,1633132936,69412978,CC BY-SA 4.0,https://stackoverflow.com/questions/69412978/h...,How to configure JAVA_HOME setup in airflow,,,,
751,"[c++, rapidjson]","{'account_id': 6706810, 'reputation': 854, 'us...",False,14,0,0,1633132847,1633132847,69412970,CC BY-SA 4.0,https://stackoverflow.com/questions/69412970/a...,arrays of arrays writing to a file using rapid...,,,,
752,"[r, stargazer]","{'account_id': 18443959, 'reputation': 29, 'us...",False,7,0,0,1633132819,1633132819,69412966,CC BY-SA 4.0,https://stackoverflow.com/questions/69412966/s...,Stargazer regression table not displaying basi...,,,,


### 6. Pandas DataFrame to .csv file

In [14]:
# Write Pandas DataFrame to .csv file
df.to_csv('C:/Users/Administrator/Desktop/Data Engineer/stack_exchange_questions.csv', index = False)