Data Visualization Analysis - Feature Analysis of Works

In [1]:
import pandas as pd
import numpy as np

from pyecharts.charts import *
from pyecharts import options as opts

Drawing function encapsulation

In [2]:
def line_chart(t, data):
    chart = (
        Line(init_opts = opts.InitOpts(theme='dark', width='600px', height='450px'))
        .add_xaxis([i[0] for i in data])
        .add_yaxis(
            '',
            [i[1] for i in data],
            is_symbol_show=False,
            areastyle_opts=opts.AreaStyleOpts(opacity=1, color="cyan")
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=t),
            xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=True),
            yaxis_opts=opts.AxisOpts(
                type_="value",
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
        )
    )
    return chart

In [3]:
def pie_chart(t, data_pair):
    #Create a new pie chart
    chart = (
        Pie(init_opts=opts.InitOpts(theme='dark', width='600px', height='400px'))
        .add('', data_pair ,radius=["30%", "45%"], #Radius range, inner and outer diameters
            label_opts=opts.LabelOpts(formatter="{b}: {d}%") #Label setting, {d} represents display percentage
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title=t
             ),
            legend_opts=opts.LegendOpts(pos_left="0%",pos_top="55",orient='vertical')
        )
    )
    return chart

3.1 Data Fetch

In [4]:
df =  pd.read_csv('douyin_dataset.csv')
del df['Unnamed: 0']
item_df = pd.read_csv('features_of_the_work.csv')
item_df.head()

Unnamed: 0,item_id,Page_view,Like_count,Publish_city,Background_music
0,0,24,0,24.0,220.0
1,1,1309,5,63.0,574.0
2,3,2,0,7.0,26289.0
3,4,613,3,146.0,162.0
4,7,2,0,33.0,540.0


3.2 Daily release status of works

In [5]:
#Count the number of works released on each day
data = df.groupby(['date']).agg({'item_id':pd.Series.nunique}).reset_index().values.tolist()
line_chart("Daily daily publication volume of works", data).render_notebook()

3.3 Page views of works

In [6]:
bins = [0, 1, 2, 4, 2000]
item_df['Browse volume level'] = pd.cut(item_df['Page_view'], bins, labels=[f'({bins[x]},{bins[x+1]}]' for x in range(len(bins)-1)])
data = item_df.groupby('Browse volume level')['Page_view'].count().reset_index().values.tolist()
pie_chart('Distribution of views on works', data).render_notebook()

  data = item_df.groupby('Browse volume level')['Page_view'].count().reset_index().values.tolist()


3.4 Like rate of works

In [7]:
bins = [-1, 0, 1, 3, 5, 10, 50]
item_df['Like level'] = pd.cut(item_df['Like_count'], bins, labels=[f'[{bins[x]},{bins[x+1]})' for x in range(len(bins)-1)], right=False)
data = item_df.groupby('Like level')['Like_count'].sum().reset_index().values.tolist()
pie_chart('Distribution of likes', data).render_notebook()

  data = item_df.groupby('Like level')['Like_count'].sum().reset_index().values.tolist()
