# 各种榜单
1. 总累计人气榜：Top20作品排名
2. 总阅读榜：Top20作品排名
3. 各类型累计人气榜：各类型Top20作品排名
4. 各类型在线阅读榜：各类型Top20作品排名
5. 作者生产力：最多产作者排行榜Top20

In [1]:
from pymongo import MongoClient
# 连接到MongoDB
client = MongoClient('mongodb://localhost:27017/')
# 打开learn数据库
db = client['learn']
# 在数据库中打开seven_cats集合
books = db['seven_cats']

In [2]:
import pandas as pd
# 将MongoDB数据转换为pandas数据
add = pd.DataFrame(list(books.find({}, {'_id': 0, 'title': 1, 'type': 1, 'serialised': 1, 'popularity': 1, 'readings': 1, 'writer': 1, 'writer_word': 1})))
add

Unnamed: 0,title,serialised,type,readings,popularity,writer,writer_word
0,重生2000：从追求青涩校花同桌开始,连载中,都市,35.5,962.0,痞子老妖,480.48
1,长生，苟在驿站，万年后我无敌,连载中,武侠仙侠,4.2,19.8,蚊香头家的小弟,38.60
2,我来自上界帝族，成婚当天媳妇跟人跑,连载中,玄幻奇幻,10.8,130.4,社恐啊社恐,61.96
3,九天斩神诀,连载中,玄幻奇幻,16.4,1055.6,小知了,686.74
4,鸿蒙霸体诀,连载中,玄幻奇幻,45.2,660.5,鱼初见,552.87
...,...,...,...,...,...,...,...
7713,重生之超级战舰,完结,科幻,0.3,14.6,彩虹之门,636.23
7714,全球进入大洪水时代,完结,科幻,0.2,25.9,死神钓者,424.60
7715,分手后，我成了天眼神医,连载中,都市,0.8,16.8,风狂笑,3370.91
7716,红楼败家子：我贾宝玉，又想纳妾了,完结,N次元,0.1,26.9,吃饭的萝卜,444.69


In [3]:
# 导入可视化工具
from pyecharts import options as opts

## 一二、总累计人气榜与总阅读榜比较榜：Top20作品排名

In [4]:
# 排序
agg1 = pd.concat([add['title'], add['popularity']], axis=1)
agg2 = pd.concat([add['title'], add['readings']], axis=1)
sort1 = agg1.sort_values('popularity', ascending=False)
sort2 = agg2.sort_values('readings', ascending=False)
sorted1 = sort1[:20].sort_values('popularity')
sorted2 = sort2[:20].sort_values('readings')
sorted1

Unnamed: 0,title,popularity
5591,捡漏,2430.8
6212,我的七个姐姐绝色倾城,2453.1
56,民间诡闻实录,2517.5
4704,元尊,2604.1
4607,史上最强炼气期,2606.8
7043,都市潜龙,2884.1
51,寒门枭士,3089.4
4557,剑道第一仙,3279.3
4815,龙王令,3545.2
134,绝世强龙,3595.8


In [5]:
# 重复率
repeat = len(set(sorted1['title']) & set(sorted2['title'])) / 20
repeat

0.4

In [6]:
from pyecharts.charts import Bar, Liquid, Grid

In [7]:
# 总累计人气榜Top15
bar1 = Bar()
bar1.add_xaxis(sorted1['title'].tolist())
bar1.add_yaxis('', sorted1['popularity'].tolist(), label_opts=opts.LabelOpts(is_show=True, position='right'))
bar1.reversal_axis()
bar1.set_global_opts(
        title_opts=opts.TitleOpts(title="总累计人气榜Top20", pos_left="25%", pos_top="2%"),
        # axislabel_opts是否显示x轴数值，splitline_opts是否展示轴线
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False), splitline_opts=opts.SplitLineOpts(is_show=False)),
        yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False))
)

<pyecharts.charts.basic_charts.bar.Bar at 0x260a40c6b10>

In [8]:
# 总阅读频次榜Top15
bar2 = Bar()
bar2.add_xaxis(sorted2['title'].tolist())
bar2.add_yaxis('', sorted2['readings'].tolist(), label_opts=opts.LabelOpts(is_show=True, position='left'))
bar2.reversal_axis()
bar2.set_global_opts(
        title_opts=opts.TitleOpts(title="总阅读频次榜Top20", pos_right="25%", pos_top="2%"),
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False), is_inverse=True, splitline_opts=opts.SplitLineOpts(is_show=False)),      # is_inverse反转输出方向
        yaxis_opts=opts.AxisOpts(position="right", splitline_opts=opts.SplitLineOpts(is_show=False))
        # position="right"将y轴默认左侧显示变为右侧
)

<pyecharts.charts.basic_charts.bar.Bar at 0x260a40c5590>

In [9]:
# 构建水球图
liquid = Liquid()
# 列表长度表示波纹数量（取最小波纹显示），is_outline_show=False不显示边框
liquid.add("重复率", [repeat, repeat, repeat], is_outline_show=False)
liquid.set_global_opts(title_opts=opts.TitleOpts(title="人气榜与阅读榜的书籍重复率", pos_left="center", pos_bottom="15%"))

<pyecharts.charts.basic_charts.liquid.Liquid at 0x2608364f5d0>

In [10]:
# 将三张图片进行整合
grid = Grid(init_opts=opts.InitOpts(width="1260px", height="720px"))
grid.add(bar1, grid_opts=opts.GridOpts(pos_right="52%", pos_left="16%"))
grid.add(bar2, grid_opts=opts.GridOpts(pos_left="52%", pos_right="16%"))
grid.add(liquid, grid_opts=opts.GridOpts())
grid.render("D:\sevencats-novels-analysis\images\七猫人气and阅读Top20.html")

'D:\\sevencats-novels-analysis\\images\\七猫人气and阅读Top20.html'

## 三、各类型累计人气榜：各类型Top20作品排名

In [4]:
# 排序
agg = pd.concat([add['title'], add['type'], add['popularity']], axis=1)
grouped = agg.groupby('type')
types = grouped.sum('popularity').sort_values('popularity', ascending=False)
types.index

Index(['都市', '玄幻奇幻', '历史', '武侠仙侠', '科幻', 'N次元', '奇闻异事', '游戏', '体育', '军事',
       '现实题材'],
      dtype='object', name='type')

In [5]:
from pyecharts.charts import Bar, Timeline

In [7]:
tl = Timeline(init_opts=opts.InitOpts(width="1260px", height="720px"))
for t in types.index:
        sort = grouped.get_group(t).sort_values('popularity', ascending=False)
        sort20 = sort[:20].sort_values('popularity')
        bar = Bar()
        bar.add_xaxis(sort20['title'].tolist())
        bar.add_yaxis('', sort20['popularity'].tolist(), label_opts=opts.LabelOpts(position="right"))
        bar.reversal_axis()
        bar.set_global_opts(
                title_opts=opts.TitleOpts(title=f"{t}累计人气榜", pos_left="center", pos_top="2%"), 
                xaxis_opts=opts.AxisOpts(name="累计人气值（万）"),
        )
        tl.add(bar, f"{t}")
        tl.add_schema(
                is_auto_play=True,     # 开启自动播放
                play_interval=2000,    # 轮播间隔（毫秒，2秒）
                is_loop_play=True,     # 循环播放
                is_timeline_show=True, # 显示时间轴
        )
tl.render("D:\sevencats-novels-analysis\images\七猫各类型累计人气榜.html")

'D:\\sevencats-novels-analysis\\images\\七猫各类型累计人气榜.html'

## 四、各类型在线阅读榜：各类型Top20作品排名

In [8]:
# 排序
agg = pd.concat([add['title'], add['type'], add['readings']], axis=1)
grouped = agg.groupby('type')
types = grouped.sum('readings').sort_values('readings', ascending=False)
types.index

Index(['都市', '玄幻奇幻', '历史', '武侠仙侠', '奇闻异事', '科幻', 'N次元', '游戏', '军事', '体育',
       '现实题材'],
      dtype='object', name='type')

In [9]:
from pyecharts.charts import Bar, Timeline

In [10]:
tl = Timeline(init_opts=opts.InitOpts(width="1260px", height="720px"))
for t in types.index:
        sort = grouped.get_group(t).sort_values('readings', ascending=False)
        sort20 = sort[:20].sort_values('readings')
        bar = Bar()
        bar.add_xaxis(sort20['title'].tolist())
        bar.add_yaxis('', sort20['readings'].tolist(), label_opts=opts.LabelOpts(position="right"))
        bar.reversal_axis()
        bar.set_global_opts(
                title_opts=opts.TitleOpts(title=f"{t}在线阅读榜", pos_left="center", pos_top="2%"), 
                xaxis_opts=opts.AxisOpts(name="阅读频次（万）"),
        )
        tl.add(bar, f"{t}")
        tl.add_schema(
                is_auto_play=True,     # 开启自动播放
                play_interval=2000,    # 轮播间隔（毫秒，2秒）
                is_loop_play=True,     # 循环播放
                is_timeline_show=True, # 显示时间轴
        )
tl.render("D:\sevencats-novels-analysis\images\七猫各类型在线阅读榜.html")

'D:\\sevencats-novels-analysis\\images\\七猫各类型在线阅读榜.html'

## 五、作者生产力：最多产作者排行榜Top20

In [4]:
# 分组排序
agg = pd.concat([add['writer'], add['writer_word']], axis=1)
set_data = agg.drop_duplicates()
sort = set_data.sort_values('writer_word', ascending=False)[:20].round(2)
sort

Unnamed: 0,writer,writer_word
7344,明宇,5103.03
4862,风青阳,4394.41
5378,失落叶,4057.28
6593,莫若梦兮,4014.75
4785,鱼人二代,3986.31
6508,执笔天涯,3392.92
5434,半块铜板,3389.73
4998,风狂笑,3370.91
6308,新版红双喜,3257.69
4708,莫默,3223.34


In [5]:
from pyecharts.charts import Funnel

In [6]:
funnel = Funnel(init_opts=opts.InitOpts(width="720px", height="720px"))
funnel.add("", [list(z) for z in zip(sort['writer'], sort['writer_word'])], label_opts=opts.LabelOpts(position="right", formatter="{b}：{c}万字"), min_=2500, max_=5200, funnel_align="left")
# legend_opts=opts.LegendOpts(is_show=False)隐藏图例
funnel.set_global_opts(legend_opts=opts.LegendOpts(is_show=False), title_opts=opts.TitleOpts(title="最多产作者排行榜Top20", pos_left="12%", pos_top="3%"))
funnel.render("D:\sevencats-novels-analysis\images\最多产作者排行榜.html")

'D:\\sevencats-novels-analysis\\images\\最多产作者排行榜.html'