In [1]:
import datetime
import gzip
# mecab-python3 installed from pip
import MeCab
import polars as pl
import urllib.request

In [2]:
# なろうAPIを叩く
# APIDOC: https://dev.syosetu.com/man/api/
url = "https://api.syosetu.com/novelapi/api/?biggenre=2&out=json"

query_params = {
    # 大ジャンル指定
    "biggenre": 2,
    # gzipレベル
    "gzip": 5,
    # 出力形式
    "out": "json",
    # 過去7日間の更新のもののみ
    "lastupdate": "thisweek",
    # ピックアップ作品のみ
    "ispickup": 1,
    # R15作品は除外
    "notr15": 1,
    # 評価が高い順
    "order": "hyoka",
    # 50件
    "lim": 50
}

req = urllib.request.Request(
    "{}?{}".format(
        url,
        urllib.parse.urlencode(query_params),
    )
)

In [3]:
# API Request
with urllib.request.urlopen(req) as res:
    decoded_res = gzip.GzipFile(fileobj=res)
    body = decoded_res.read()

In [4]:
# save json to body_responces
body_json = body.decode("utf-8")
now = datetime.datetime.now()

path = "body_responses/body_json_{}.json".format(now.strftime("%Y%m%d%H%M%S"))
with open(path, mode="w") as file:
    file.write(body_json)

In [7]:
# Read JSON as polar dataframe
# 1行目は件数なので除外
body_df = pl.read_json(path).with_row_count("row_number").filter(pl.col("row_number") > 1).sort("global_point", reverse=True)
body_df.head()

row_number,allcount,title,ncode,userid,writer,story,biggenre,genre,gensaku,keyword,general_firstup,general_lastup,novel_type,end,general_all_no,length,time,isstop,isr15,isbl,isgl,iszankoku,istensei,istenni,pc_or_k,global_point,daily_point,weekly_point,monthly_point,quarter_point,yearly_point,fav_novel_cnt,impression_cnt,review_cnt,all_point,all_hyoka_cnt,sasie_cnt,kaiwaritu,novelupdated_at,updated_at
u32,i64,str,str,i64,str,str,i64,i64,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,str
2,,"""弓と剣""","""N3017BO""",317870,"""淳A""","""ヴィジャヤン伯爵家三男サダは...",2,201,"""""","""""","""2013-03-14 11:...","""2023-02-12 01:...",1,1,474,2180275,4361,0,0,0,0,0,0,0,2,64930,42,88,188,1894,3456,18898,2364,10,27134,2948,0,32,"""2023-02-12 01:...","""2023-02-12 18:..."
3,,"""英雄魔術師はのんびり暮らした...","""N6394EI""",742785,"""柊遊馬""","""【書籍化：書籍版・コミック発...",2,201,"""""","""残酷な描写あり 異世界転生 ...","""2017-10-26 11:...","""2023-02-12 08:...",1,1,1388,3838362,7677,0,0,0,0,1,1,0,2,55523,16,68,350,1172,7158,18122,700,5,19279,2149,0,43,"""2023-02-12 08:...","""2023-02-12 20:..."
4,,"""優しい家族と、たくさんのもふ...","""N3782FU""",1749196,"""ありぽん""","""　＊ツギクルブックス様より、...",2,201,"""""","""異世界転生 ファンタジー 異...","""2019-10-07 20:...","""2023-02-12 12:...",1,1,635,1812704,3626,0,0,0,0,0,1,0,2,41291,12,102,342,1242,5066,12772,1240,1,15747,1727,0,48,"""2023-02-12 19:...","""2023-02-12 20:..."
5,,"""シーフな魔術師""","""N5035L""",81670,"""極楽とんぼ""","""盗賊《シーフ》ギルドで働いて...",2,201,"""""","""魔術師 シーフ ファンタジー...","""2010-05-19 21:...","""2023-02-12 12:...",1,1,834,1673193,3347,0,0,0,0,0,0,0,2,21384,0,48,150,484,2762,6346,1536,1,8692,984,0,20,"""2023-02-12 23:...","""2023-02-12 23:..."
6,,"""聖剣が最強の世界で、少年は弓...","""N3377HV""",1033248,"""さとう""","""　かつて、四人の魔王が率いる...",2,201,"""""","""残酷な描写あり 魔王 人間 ...","""2022-09-10 17:...","""2023-02-12 07:...",1,1,173,540420,1081,0,0,0,0,1,0,0,2,20980,4,106,392,1700,20932,5691,199,0,9598,1043,0,51,"""2023-02-12 07:...","""2023-02-12 21:..."


In [25]:
title = body_df.select("title").head(2)["title"][1]
print(title)

英雄魔術師はのんびり暮らしたい 　のんびりできない異世界生活


In [35]:
# titleを形態素解析する
tagger = MeCab.Tagger("-Owakati")
node = tagger.parse(title).split()
print(node)

['英雄', '魔術', '師', 'は', 'のんびり', '暮らし', 'たい', 'のんびり', 'でき', 'ない', '異', '世界', '生活']
