# 双十一美妆产品数据分析

In [1]:
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Map, Timeline, Bar, Line, Pie

## 双十一美妆产品数据清洗

In [11]:
df = pd.read_csv("datasets/双十一淘宝美妆数据.csv")

df.rename(columns={
    'update_time':'更新时间',
    'title':'商品',
    'price':'价格',
    'sale_count':'销售量',
    'comment_count':'评论量'
},inplace=True)

df.drop('id', axis=1, inplace=True)
df.head(5)

Unnamed: 0,更新时间,商品,价格,销售量,评论量,店名
0,2016/11/14,CHANDO/自然堂 雪域精粹纯粹滋润霜50g 补水保湿 滋润水润面霜,139.0,26719.0,2704.0,自然堂
1,2016/11/14,CHANDO/自然堂凝时鲜颜肌活乳液120ML 淡化细纹补水滋润专柜正品,194.0,8122.0,1492.0,自然堂
2,2016/11/14,CHANDO/自然堂活泉保湿修护精华水（滋润型135ml 补水控油爽肤水,99.0,12668.0,589.0,自然堂
3,2016/11/14,CHANDO/自然堂 男士劲爽控油洁面膏 100g 深层清洁 男士洗面奶,38.0,25805.0,4287.0,自然堂
4,2016/11/14,CHANDO/自然堂雪域精粹纯粹滋润霜（清爽型）50g补水保湿滋润霜,139.0,5196.0,618.0,自然堂


In [12]:
print(df.dtypes)
print(f"Dataframe shape: {df.shape}")

更新时间     object
商品       object
价格      float64
销售量     float64
评论量     float64
店名       object
dtype: object
Dataframe shape: (27598, 6)


In [13]:
print(df.isnull().sum())
print(df[df.duplicated()].count())

df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)

df['销售量'] = df['销售量'].fillna(0)
df['评论量'] = df['评论量'].fillna(0)


更新时间       0
商品         0
价格         0
销售量     2354
评论量     2354
店名         0
dtype: int64
更新时间    101
商品      101
价格      101
销售量      82
评论量      82
店名      101
dtype: int64


In [14]:
df['更新时间'] = pd.to_datetime(df['更新时间']).apply(lambda x:x.strftime("%Y-%m-%d"))
df['销售量'] = df['销售量'].astype(int)
df['评论量'] = df['评论量'].astype(int)
df['营业额'] = df['价格'] * df['销售量']
df = df[['更新时间','商品','价格','销售量','营业额','评论量','店名']]
df.head(5)

Unnamed: 0,更新时间,商品,价格,销售量,营业额,评论量,店名
0,2016-11-14,CHANDO/自然堂 雪域精粹纯粹滋润霜50g 补水保湿 滋润水润面霜,139.0,26719,3713941.0,2704,自然堂
1,2016-11-14,CHANDO/自然堂凝时鲜颜肌活乳液120ML 淡化细纹补水滋润专柜正品,194.0,8122,1575668.0,1492,自然堂
2,2016-11-14,CHANDO/自然堂活泉保湿修护精华水（滋润型135ml 补水控油爽肤水,99.0,12668,1254132.0,589,自然堂
3,2016-11-14,CHANDO/自然堂 男士劲爽控油洁面膏 100g 深层清洁 男士洗面奶,38.0,25805,980590.0,4287,自然堂
4,2016-11-14,CHANDO/自然堂雪域精粹纯粹滋润霜（清爽型）50g补水保湿滋润霜,139.0,5196,722244.0,618,自然堂


## 双十一美妆数据可视化EDA

### 每日美妆总销量走势

In [41]:
temp = df.groupby('更新时间').agg({'销售量':'sum'}).reset_index()
temp['销售量'] = temp['销售量'].astype(int)
temp = temp.sort_values(by='更新时间',ascending=True)

plot1 = (
    Line()
    .add_xaxis(temp['更新时间'].tolist())
    .add_yaxis("销售量", temp['销售量'].tolist())
    .set_series_opts(
        linestyle_opts=opts.LineStyleOpts(color='#5A72A0'),
        areastyle_opts=opts.AreaStyleOpts(opacity=0.6, color="#5A72A0"), #添加阴影
        label_opts=opts.LabelOpts(is_show=False),
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_='max', name='最大销售量', itemstyle_opts=opts.ItemStyleOpts(color="#36BA98")),
                opts.MarkPointItem(type_='average', name='平均销售量', itemstyle_opts=opts.ItemStyleOpts(color='#3FA2F6')),
                opts.MarkPointItem(type_='min', name="最小销售量",itemstyle_opts=opts.ItemStyleOpts(color='#E76F51')),
            ]
        ),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="每日总销售量走势"),
        xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False)),
        yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False))
    )
)
plot1.render_notebook()

### 美妆产品累积销售量 Top10

In [99]:
datetime = df['更新时间'].unique()
datetime.sort()
datetime = datetime.tolist()
datetime

timeline = Timeline()
timeline.add_schema(is_loop_play=False, play_interval=500)
for time in datetime:
    d = df[df['更新时间'] <= time].groupby('店名').agg({'销售量':'sum', '营业额':'sum'}).sort_values(by='销售量', ascending=False).head(10).sort_values(by="销售量").to_dict()
    bar = (
        Bar()
        .add_xaxis([*d['销售量'].keys()])
        .add_yaxis("销售量", [round(val/10000,2) for val in d['销售量'].values()], 
                   label_opts=opts.LabelOpts(position="right", formatter='{@[1]/} 万个'),
                   itemstyle_opts=opts.ItemStyleOpts(color="#36BA98"))
        .add_yaxis("销售额", [round(val/10000000,2) for val in d['营业额'].values()], 
                   label_opts=opts.LabelOpts(position="right", formatter='￥{@[1]/} 亿元'),
                   itemstyle_opts=opts.ItemStyleOpts(color="#3FA2F6"))
        .reversal_axis()
        .set_global_opts(
            title_opts=opts.TitleOpts(title="美妆产品累计销售量排行 TOP10"),
            xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False)),
            yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False))
        )
    )
    timeline.add(bar, time)
    
timeline.render_notebook()

In [169]:
temp3 = df.copy()
temp3 = temp3.groupby(['店名']).agg({'价格':'mean','销售量':'sum'}).sort_values('价格',ascending=True).to_dict()
temp3

{'价格': {'美加净': 44.69461859356377,
  '妮维雅': 73.78905311778291,
  '悦诗风吟': 121.24594505130752,
  '相宜本草': 122.95844630616908,
  '蜜丝佛陀': 142.11889400921658,
  '美宝莲': 148.75757575757575,
  '欧莱雅': 167.2826982492276,
  '自然堂': 180.13021276595745,
  '雅漾': 212.6184012066365,
  '欧珀莱': 276.21854304635764,
  '薇姿': 281.0857908847185,
  '佰草集': 289.8231713780919,
  '植村秀': 311.80907877169557,
  '玉兰油': 330.6583706356312,
  '倩碧': 346.0921902524956,
  '兰芝': 356.6158088235294,
  '资生堂': 577.4384896467723,
  '兰蔻': 756.4007782101168,
  '雅诗兰黛': 872.4707182320442,
  '雪花秀': 901.0828729281768,
  'SKII': 1011.727078891258,
  '娇兰': 1361.0435875943},
 '销售量': {'美加净': 8825906,
  '妮维雅': 38254460,
  '悦诗风吟': 39070496,
  '相宜本草': 65462947,
  '蜜丝佛陀': 15391247,
  '美宝莲': 39358088,
  '欧莱雅': 33773155,
  '自然堂': 17837452,
  '雅漾': 6047851,
  '欧珀莱': 3950972,
  '薇姿': 880090,
  '佰草集': 14994464,
  '植村秀': 0,
  '玉兰油': 0,
  '倩碧': 7214560,
  '兰芝': 9130244,
  '资生堂': 351221,
  '兰蔻': 3107006,
  '雅诗兰黛': 5361138,
  '雪花秀': 703631,
  'SKII': 0,
 

In [170]:
barplot = (
    Bar()
        .add_xaxis([*temp3['价格'].keys()])
        .add_yaxis("价格", [round(val,2) for val in temp3['价格'].values()], 
                label_opts=opts.LabelOpts(position="right", formatter='￥{@[1]/} 元'),
                itemstyle_opts=opts.ItemStyleOpts(color="#3FA2F6"))
        .add_yaxis("销售量", [round(val/1000000,2) for val in temp3['销售量'].values()], 
                label_opts=opts.LabelOpts(position="right", formatter='{@[1]/} 万个'),
                itemstyle_opts=opts.ItemStyleOpts(color="#FF7777"))
        .reversal_axis()
        .set_global_opts(
            title_opts=opts.TitleOpts(title="店家产品均价排行"),
            xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False),
                                     axislabel_opts=opts.LabelOpts(interval=50) ),
            yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=False))
    )
)
barplot.render_notebook()