In [1]:
import pandas as pd 
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
data = pd.read_csv('电子产品销售分析.csv', index_col= 0)
#设置数字显示格式：保留两位小数
pd.set_option('display.float_format', lambda x: '%.2f' %x)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 564169 entries, 0 to 2633520
Data columns (total 11 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   event_time     564169 non-null  object 
 1   order_id       564169 non-null  int64  
 2   product_id     564169 non-null  int64  
 3   category_id    564169 non-null  float64
 4   category_code  434799 non-null  object 
 5   brand          536945 non-null  object 
 6   price          564169 non-null  float64
 7   user_id        564169 non-null  float64
 8   age            564169 non-null  float64
 9   sex            564169 non-null  object 
 10  local          564169 non-null  object 
dtypes: float64(4), int64(2), object(5)
memory usage: 51.7+ MB


In [3]:
data.isnull().sum()


event_time            0
order_id              0
product_id            0
category_id           0
category_code    129370
brand             27224
price                 0
user_id               0
age                   0
sex                   0
local                 0
dtype: int64

In [5]:
data[['category_id','user_id','age']]=data[['category_id','user_id','age']].astype('int64')

In [6]:
data['event_time'] = pd.to_datetime(data['event_time'])
data['month'] = data['event_time'].dt.month
data['quarter'] = data['event_time'].dt.to_period('Q')



In [7]:
data.drop(data[data['quarter']=='1970Q1'].index, inplace=True) #去除日期异常的值

In [8]:
bins = [16, 20, 30, 40, 60]#年龄分桶
labels=['16~19','20~29','30~39','40~50']

In [13]:
data['Age_group'] = pd.cut(data['age'], bins, right=False, labels=labels)

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 562862 entries, 0 to 2633520
Data columns (total 14 columns):
 #   Column         Non-Null Count   Dtype              
---  ------         --------------   -----              
 0   event_time     562862 non-null  datetime64[ns, UTC]
 1   order_id       562862 non-null  int64              
 2   product_id     562862 non-null  int64              
 3   category_id    562862 non-null  int64              
 4   category_code  433938 non-null  object             
 5   brand          535699 non-null  object             
 6   price          562862 non-null  float64            
 7   user_id        562862 non-null  int64              
 8   age            562862 non-null  int64              
 9   sex            562862 non-null  object             
 10  local          562862 non-null  object             
 11  month          562862 non-null  int64              
 12  quarter        562862 non-null  period[Q-DEC]      
 13  Age_group      562862 non-nu

### 查看整个的销售情况

In [60]:
Sales_overview = data.groupby(data['month'])['price'].sum().reset_index()#每月的销售额

In [61]:
Sales_overview['customer_count(unique)'] = data.groupby(
    ['month'])['user_id'].nunique().reset_index()['user_id']#每月的客户数

In [62]:
Sales_overview['Order_count'] = data.groupby(
    ['month'])['user_id'].count().reset_index()['user_id']#每月的订单数

In [67]:
Sales_overview['per_customer_transation'] = (Sales_overview['price']/Sales_overview['customer_count(unique)']).round(2)
Sales_overview['Avg_age'] = data.groupby(['month'])['age'].mean().reset_index()['age']


In [68]:
Sales_overview.rename(columns={'price':'Total Sales'},inplace=True)

In [69]:
Sales_overview

Unnamed: 0,month,Total Sales,customer_count(unique),Order_count,per_customer_transation,Avg_age
0,1,1774466.94,1866,14270,950.95,31.92
1,2,2267809.88,2299,17995,986.43,34.02
2,3,2897486.26,2624,18688,1104.22,33.15
3,4,1704422.41,5519,11868,308.83,33.04
4,5,7768637.79,17474,40334,444.58,33.1
5,6,7691244.33,14150,41364,543.55,33.42
6,7,16354029.27,30308,76418,539.59,32.9
7,8,27982605.44,35611,100007,785.79,32.97
8,9,17152310.57,19886,70499,862.53,33.32
9,10,19765680.76,14592,104084,1354.56,32.89


In [80]:
line_style = {'normal':
             {
                 'width':4,
                 'shadow_Color':'rgba(155.15,184, .3)',
                 'shadowBlur': 10,  # 光影大小
        'shadowOffsetY': 10,  # Y轴方向阴影偏移
        'shadowOffsetX': 10,  # x轴方向阴影偏移
        'curve': 0.5  # 线弯曲程度，1表示不弯曲
             }}

In [81]:
x_data = Sales_overview['month']

In [82]:
y_data_1 = Sales_overview['Total Sales']
y_data_2 = Sales_overview['per_customer_transation']
y_data_3 = Sales_overview['Order_count']
y_data_4 = Sales_overview['Avg_age']

In [83]:
line = (
    Line(init_opts=opts.InitOpts(theme='light',
                                 width='1000px',
                                 height='600px'))
    .add_xaxis(x_data)
    .add_yaxis("总销售额",
               y_data_1,
               is_symbol_show=False,
               is_smooth=True,
               # 传入线风格参数
               linestyle_opts=line_style)

    .extend_axis(yaxis=opts.AxisOpts())
    .add_yaxis("客单价",
               y_data_2,
               is_symbol_show=False,
               is_smooth=True,
               yaxis_index=1,
               # 传入线风格参数
               linestyle_opts=line_style)
    .add_yaxis("订单数",
               y_data_3,
               is_symbol_show=False,
               is_smooth=True,
               yaxis_index=1,
               # 传入线风格参数
               linestyle_opts=line_style)
    .add_yaxis("平均购买年龄",
              y_data_4,
              is_symbol_show=False,
              is_smooth=True,
              yaxis_index=1,
              linestyle_opts=line_style)
    .set_series_opts(
        label_opts=opts.LabelOpts(is_show=False),
    )
    # 添加标题
    .set_global_opts(title_opts=opts.TitleOpts(title="2020每月销售额趋势", pos_left='center'),
                     xaxis_opts=opts.AxisOpts(
        type_="category",
        boundary_gap=False,  # 两边不显示间隔
    ), legend_opts=opts.LegendOpts(pos_top='5%')  # 距离上边界的距离
        , tooltip_opts=opts.TooltipOpts(is_show=True, trigger='axis', axis_pointer_type='line')
    )
)


line.render_notebook()

### 接下来查看各个季度销售占比

In [84]:
quarter = data.groupby(['quarter'])['price'].sum().reset_index()

In [85]:
quarter['quarter'] = quarter['quarter'].astype('string')

In [86]:
data_pair = [(row['quarter'], row['price']) for _, row in quarter.iterrows()]

In [88]:
# 新建一个饼图
chart = Pie(
    init_opts=opts.InitOpts(
        theme='light',  # 主题
        width='500px',  # 画布大小
        height='500px'
    )
)

chart.add(
    '',
    data_pair,
    radius=["30%", "45%"],  # 半径范围，内径和外径
    label_opts=opts.LabelOpts(formatter="{b}: {d}%"  # 标签设置，{d}表示显示百分比
    )
)
chart.set_global_opts(
    title_opts=opts.TitleOpts(title="Total Sales by Quarter", pos_left= 'center'),
    legend_opts=opts.LegendOpts(
        is_show=True,
        pos_left='center',
        pos_top='5%',
    ),
)

chart.render_notebook()

In [89]:
df_1 = data.groupby(['brand']).agg({'price':'sum', 'order_id':'count'}).reset_index().sort_values('price',ascending = False).head(50)
df_1['brand'] = df_1['brand'].apply(lambda x:  x.title())
df_1.sort_values('price', inplace =True)
chart = Bar(
    init_opts=opts.InitOpts(
        theme='light',
        width='980px',
        height='800px'
    )
)
# 添加x轴数据
chart.add_xaxis(df_1['brand'].tolist())
# 添加y轴数据
chart.add_yaxis(
    "",
    df_1['price'].round(2).tolist(),
    itemstyle_opts={  # 图元样式
        'shadowBlur': 10,   # 光影大小
        'shadowColor': 'rgba(0, 0, 0, 0.5)',  # 阴影颜色
        'shadowOffsetY': 5,
        'shadowOffsetX': 5,  # 偏移量
        'barBorderRadius': [0, 10, 10, 0],  # 圆角设置
    },
    # 标签设置
    label_opts=opts.LabelOpts(
        is_show=True,   # 显示标签
        position='right',   # 显示位置
        formatter='{c}'   # 显示内容 {c}显示数值
        )
        )

chart.set_global_opts(
    title_opts=opts.TitleOpts(   # 标题设置
        title="各品牌销售额排行",
        subtitle=''),
    xaxis_opts=opts.AxisOpts(is_show=False),  # x轴位置显示在上方
    visualmap_opts=opts.VisualMapOpts(  # 视觉组件
        is_show=False,
        max_=40,
        min_=-50,
        dimension=0,   # 指定使用的数据维度
        range_color=['#ffFFff', '#00704a']   # 颜色范围
    ),
    datazoom_opts=opts.DataZoomOpts(range_start=100,range_end=75, orient = 'vertical', pos_left = '1%')
    )
# xy轴翻转
chart.reversal_axis()
chart.render_notebook()

#### 顾客年龄分布

In [91]:

Age_group = data.groupby(['Age_group']).agg({'user_id': pd.Series.nunique}).reset_index()
data_pair = [(row['Age_group'], row['user_id']) for _, row in Age_group.iterrows()]
# 新建一个饼图
chart = Pie(
    init_opts=opts.InitOpts(
        theme='light',  # 主题
        width='500px',  # 画布大小
        height='500px'
    )
)

chart.add(
    '',
    data_pair,
    radius=["30%", "45%"],  # 半径范围，内径和外径
    label_opts=opts.LabelOpts(formatter="{b}: {d}%"  # 标签设置，{d}表示显示百分比
    )
)
chart.set_global_opts(
    title_opts=opts.TitleOpts(title="顾客年龄分布", pos_left= 'center'),
    legend_opts=opts.LegendOpts(
        is_show=True,
        pos_left='center',
        pos_top='5%',
    ),
)

chart.render_notebook()

### 各个省份购买力对比

In [92]:
Sales = data.groupby(['local']).agg({'price':'sum','user_id':pd.Series.nunique}).reset_index()

Sales['PP'] = Sales['price']/Sales['user_id']

Sales['price'] = Sales['price'].astype('int')
Sales['PP'] = Sales['PP'].astype('int')
Sales.sort_values(by=['price'], ascending=[0], inplace =True)
Sales

Unnamed: 0,local,price,user_id,PP
4,广东,25999875,21636,1201
0,上海,19817768,16253,1219
1,北京,19066955,16121,1182
9,湖南,6981796,5387,1296
7,海南,6966574,5508,1264
2,四川,6769440,5518,1226
6,浙江,6519561,5428,1201
10,重庆,6463185,5396,1197
3,天津,6430335,5393,1192
5,江苏,6330488,5635,1123


In [93]:
x_data = Sales['local'].tolist()
y_data_1 = Sales['price'].tolist()
y_data_2 = Sales['PP']

# 新建一个直方图Bar
bar = Bar()
bar.add_xaxis(x_data)
# 添加一个Y轴
bar.extend_axis(yaxis=opts.AxisOpts())
# 通过 yaxis_index指定Y轴
bar.add_yaxis(
    '销售总额',   # 系列名称
    y_data_1,   # 添加数据
    yaxis_index=0,   # 指定y轴，等于0时可以省略
    color='rgba(255, 0, 0, .5)'  # 未避免柱状图遮挡住折线，我们可以调整透明度
)

# 新建一个折线图Line
line = Line()

line.add_xaxis(x_data)
# 将line数据通过yaxis_index指向后添加的Y轴
line.add_yaxis(
    '人均购买力',   # 系列名称
    y_data_2,   # 添加折线图的数据
    yaxis_index=1  # 指定使用的Y轴
)

bar.set_global_opts(title_opts=opts.TitleOpts(title="各省销售总额与人均购买力对比", pos_left='center'),
                    legend_opts=opts.LegendOpts(
                    is_show=True,
                    pos_left='35%',
                    pos_top='6%',
                    orient='horizontal',  # 图例水平布局
                    ),
)

# overlap 将两个（Bar和Line层叠在一起）
bar.overlap(line)
# 渲染图表
bar.render_notebook()