# QiitaのAPIからデータを取得する

In [22]:
import os
import requests
import pandas as pd

In [2]:
os.environ['QIITA_API_KEY'] = 'edfaad48f477034471e92b15c7dfa8904f49c61f'

In [3]:
os.getenv('QIITA_API_KEY')

'edfaad48f477034471e92b15c7dfa8904f49c61f'

## 関数定義

In [14]:
def request_qiita_data(url):
    """
    QiitaのAPIを呼び出してデータを取得する
    """
    headers = {
        'Content-Type': 'application/json',
        'Charset': 'utf-8',
        'Authorization': 'Bearer {}'.format(os.getenv('QIITA_API_KEY'))
    }
    response = requests.get(url=url, headers=headers)
    print('{}, {}'.format(response.status_code, response.url))
    return response

In [44]:
def get_qiita_item():
    """
    QiitaのAPIからデータを取集する
    
    下記の条件から、最大1万件のデータが取得できる
    QiitaのAPIは、1回のレスポンスで最大100件
    ページング番号は100まで
    """
    item_list = []
    for i in range(1, 101):
        url = 'https://qiita.com/api/v2/items?&per_page=100&page={page}'.format(page=i)
        response = request_qiita_data(url)
        result_list = response.json()
        item_list.extend(result_list)
    return pd.DataFrame.from_records(item_list)

## 呼び出し

In [45]:
result_item_df = get_qiita_item()

200, https://qiita.com/api/v2/items?&per_page=100&page=1
200, https://qiita.com/api/v2/items?&per_page=100&page=2
200, https://qiita.com/api/v2/items?&per_page=100&page=3
200, https://qiita.com/api/v2/items?&per_page=100&page=4
200, https://qiita.com/api/v2/items?&per_page=100&page=5
200, https://qiita.com/api/v2/items?&per_page=100&page=6
200, https://qiita.com/api/v2/items?&per_page=100&page=7
200, https://qiita.com/api/v2/items?&per_page=100&page=8
200, https://qiita.com/api/v2/items?&per_page=100&page=9
200, https://qiita.com/api/v2/items?&per_page=100&page=10
200, https://qiita.com/api/v2/items?&per_page=100&page=11
200, https://qiita.com/api/v2/items?&per_page=100&page=12
200, https://qiita.com/api/v2/items?&per_page=100&page=13
200, https://qiita.com/api/v2/items?&per_page=100&page=14
200, https://qiita.com/api/v2/items?&per_page=100&page=15
200, https://qiita.com/api/v2/items?&per_page=100&page=16
200, https://qiita.com/api/v2/items?&per_page=100&page=17
200, https://qiita.com/

In [46]:
result_item_df.shape

(10000, 17)

In [47]:
result_item_df.head(1)

Unnamed: 0,rendered_body,body,coediting,comments_count,created_at,group,id,likes_count,private,reactions_count,tags,title,updated_at,url,user,page_views_count,team_membership
0,<p>▪️ 違うサイトの表示を自身のサイト内に貼り付けるには　？</p>\n\n<p>一見難...,▪️ 違うサイトの表示を自身のサイト内に貼り付けるには　？\n\n一見難しく考えるかもしれま...,False,0,2021-07-23T08:40:10+09:00,,0425cf1191d5e9ffa627,0,False,0,"[{'name': 'Ruby', 'versions': []}, {'name': 'H...",サイト内に違うサイトを表示,2021-07-23T08:40:59+09:00,https://qiita.com/wagadayon/items/0425cf1191d5...,{'description': 'I'm univercity student. My n...,,


In [49]:
result_item_df.drop(['tags', 'user'], axis=1).shape

(10000, 15)

In [48]:
result_item_df.drop(['tags', 'user'], axis=1).head(1)

Unnamed: 0,rendered_body,body,coediting,comments_count,created_at,group,id,likes_count,private,reactions_count,title,updated_at,url,page_views_count,team_membership
0,<p>▪️ 違うサイトの表示を自身のサイト内に貼り付けるには　？</p>\n\n<p>一見難...,▪️ 違うサイトの表示を自身のサイト内に貼り付けるには　？\n\n一見難しく考えるかもしれま...,False,0,2021-07-23T08:40:10+09:00,,0425cf1191d5e9ffa627,0,False,0,サイト内に違うサイトを表示,2021-07-23T08:40:59+09:00,https://qiita.com/wagadayon/items/0425cf1191d5...,,


In [50]:
result_item_df.drop(['tags', 'user'], axis=1).to_csv('result/qiita_item.csv', index=False, quoting=1, line_terminator='\r\n')

## 実験

In [32]:
url = 'https://qiita.com/api/v2/items?&per_page=100&page=2'

In [33]:
response = request_qiita_data(url)

200, https://qiita.com/api/v2/items?&per_page=100&page=2


In [34]:
result_list = response.json()

In [35]:
len(result_list)

100

In [36]:
type(result_list)

list

### APIレスポンスデータをpandas化してみる

In [37]:
result_df = pd.DataFrame.from_records(result_list)

In [27]:
result_df.shape

(100, 17)

In [30]:
result_df.head(1)

Unnamed: 0,rendered_body,body,coediting,comments_count,created_at,group,id,likes_count,private,reactions_count,tags,title,updated_at,url,user,page_views_count,team_membership
0,"\n<h1>\n<span id=""はじめに"" class=""fragment""></spa...",# はじめに\n\nAxrossを運営している藤原です。\n\n**[Axross](htt...,False,0,2021-07-23T08:21:54+09:00,,e3a97eef46cbef67eb7a,1,False,0,"[{'name': 'Python', 'versions': []}, {'name': ...",自然言語処理技術を活用したレシピの紹介,2021-07-23T08:21:54+09:00,https://qiita.com/Axross_SBiv/items/e3a97eef46...,"{'description': '現役エンジニアによる様々な業務で活かせる実践教材""レシピ""...",,


In [38]:
result_df.head(1)

Unnamed: 0,rendered_body,body,coediting,comments_count,created_at,group,id,likes_count,private,reactions_count,tags,title,updated_at,url,user,page_views_count,team_membership
0,"\n<h1>\n<span id=""mypyとは"" class=""fragment""></s...",# mypyとは\n\nhttps://github.com/python/mypy\n\n...,False,0,2021-07-22T18:56:59+09:00,,7710e028ef33ca338a4b,0,False,0,"[{'name': 'Python', 'versions': []}, {'name': ...",PythonのmypyでUnionを使うとき,2021-07-22T18:56:59+09:00,https://qiita.com/asuzuki2008/items/7710e028ef...,{'description': 'ここ数年Pythonでサーバーサイド案件がしかやってないW...,,


In [31]:
result_df.to_csv('result/result.csv', index=False, quoting=1, line_terminator='\r\n')

In [1]:
# for i in range(1, 101):
#     print(i)