# 导入库

In [1]:
 import pandas as pd
import jieba
import wordcloud
import imageio
from collections import Counter
from pyecharts import options as opts
from pyecharts.charts import Bar

# 导入数据，查看，缺失值，异常值处理

In [2]:
data = pd.read_csv('../spider/kfc.csv')
data.head()

Unnamed: 0,name,foods,price,img_url
0,1元安心大油条,产品实付满49元（不含外送费），可1元换购安心大油条1根，每单限1份,1.0,https://img.4008823823.com.cn/kfcios/Version/7...
1,1元芝士鸡肉帕尼尼,产品实付满69元（不含外送费），可1元换购芝士鸡肉帕尼尼1份，每单限1份,1.0,https://img.4008823823.com.cn/kfcios/Version/7...
2,有鸡腿全餐双人餐,套餐包含：有鸡腿中式/西式全餐（可选），共2份。<br>有鸡腿中式全餐：新奥尔良烤鸡腿1份+...,0.0,https://img.4008823823.com.cn/kfcios/Version/7...
3,有鸡腿西式全餐(香草流心可颂),新奥尔良烤鸡腿1块+培根炒蛋1份+香脆薯饼1块+大溪地香草奶香流心可颂2只+咖啡饮品1杯（可...,29.0,https://img.4008823823.com.cn/kfcios/Version/7...
4,有鸡腿西式全餐(香草流心可颂),新奥尔良烤鸡腿1块+太阳蛋+香脆薯饼1块+大溪地香草奶香流心可颂2只+咖啡饮品1杯（可加价换...,29.0,https://img.4008823823.com.cn/kfcios/Version/7...


In [3]:
data.describe()

Unnamed: 0,price
count,90.0
mean,13.872222
std,7.257914
min,0.0
25%,9.0
50%,13.5
75%,17.0
max,36.0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 4 columns):
name       90 non-null object
foods      89 non-null object
price      90 non-null float64
img_url    90 non-null object
dtypes: float64(1), object(3)
memory usage: 2.9+ KB


In [5]:
# 价格为 0.0 的使用平均数填充
def f(p):
    if p == 0.0:
        p = 14
        return p
    else:
         return p
data['price'] = data['price'].map(f)

In [6]:
# 删除缺失值的行
data = data.dropna()

# 词云

In [7]:
# 连接所有餐名，食物内容
names = list(data['name'])
foods = list(data['foods'])
names.extend(foods)
names = ' '.join(names)

# 分词
ls = jieba.lcut(names)
txt = ' '.join(ls)

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\MUXIAT~1\AppData\Local\Temp\jieba.cache
Loading model cost 1.436 seconds.
Prefix dict has been built successfully.


In [8]:
# 清洗掉与食物无关的词语
txt = txt.replace('产品','').replace('包装','').replace('包装实物','')\
      .replace('br','').replace('随心换','').replace('实物','')\
      .replace('主要','').replace('原料','').replace('指比菜','')\
      .replace('单单','').replace('加价','').replace('换购','')\
      .replace('总价','').replace('金额','').replace('为准','')\
      .replace('早餐','').replace('饮品','')

In [9]:
#词云生成
mask = imageio.imread("foodName.jpg")
w = wordcloud.WordCloud(background_color = 'white',
                        width = 1000,height = 700,
                        font_path = 'msyh.ttc',
                        mask = mask)
w.generate(txt)
w.to_file('wordcloud.png')

<wordcloud.wordcloud.WordCloud at 0x1c978c40dd8>

# 主食营养分布

In [10]:
hamburger = {
    '热量' : 250,
    '脂肪' : 10,
    '碳水化合物' : 20,
    '蛋白质' : 10
}
rice_ball = {
    '热量' : 200,
    '脂肪' : 11,
    '碳水化合物' : 21,
    '蛋白质' : 6
}
porridge = {
    '热量' : 50,
    '脂肪' : 1.5,
    '碳水化合物' : 8,
    '蛋白质' : 2.5
}
chicken = {
    '热量' : 255,
    '脂肪' : 15,
    '碳水化合物' : 10,
    '蛋白质' : 21
}

In [11]:
x = ['汉堡','饭团','粥','鸡肉']
y1 = [250,200,50,255]
y2 = [10,11,1.5,15]
y3 = [20,21,8,10]
y4 = [10,6,2.5,21]
c = Bar()
c.add_xaxis(x)
c.add_yaxis("热量", y1, stack="stack1")
c.add_yaxis("脂肪", y2, stack="stack1")
c.add_yaxis("碳水化合物", y3, stack="stack1")
c.add_yaxis("蛋白质", y4, stack="stack1")
c.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
c.set_global_opts(title_opts=opts.TitleOpts(title="主食营养分布"))
#c.render_notebook()
c.render('主食营养分布.html')

'D:\\python项目\\我的爬虫+数据分析\\KFC早餐\\analyze\\主食营养分布.html'

# 小吃营养分布

In [12]:
youtiao = {
    '热量' : 388,
    '脂肪' : 18,
    '碳水化合物' : 51,
    '蛋白质' : 7
}
chayedan = {
    '热量' : 151,
    '脂肪' : 6,
    '碳水化合物' : 12,
    '蛋白质' : 11
}
shubin = {
    '热量' : 327,
    '脂肪' : 22,
    '碳水化合物' : 32,
    '蛋白质' : 3
}
danta = {
    '热量' : 255,
    '脂肪' : 22,
    '碳水化合物' : 38,
    '蛋白质' : 7
}

In [16]:
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType

list2 = [
    {"value": 388, "percent": 388 / (388 + 18 + 51 + 7)},
    {"value": 151, "percent": 151 / (151 + 6 + 12 + 11)},
    {"value": 327, "percent": 327 / (327 + 22 + 32 + 3)},
    {"value": 255, "percent": 255 / (22 + 38 +  + 7 + 255)},
]

list3 = [
    {"value": 18, "percent": 18 / (388 + 18 + 51 + 7)},
    {"value": 6, "percent": 6 / (151 + 6 + 12 + 11)},
    {"value": 22, "percent": 22 / (327 + 22 + 32 + 3)},
    {"value": 22, "percent": 22 / (22 + 38 + 7 + 255)},
]

list4 = [
    {"value": 51, "percent": 51 / (388 + 18 + 51 + 7)},
    {"value": 12, "percent": 12 / (151 + 6 + 12 + 11)},
    {"value": 32, "percent": 32 / (327 + 22 + 32 + 3)},
    {"value": 38, "percent": 38 / (22 + 38 + 7 + 255)},
]

list5 = [
    {"value": 7, "percent": 7 / (388 + 18 + 51 + 7)},
    {"value": 11, "percent": 11 / (151 + 6 + 12 + 11)},
    {"value": 3, "percent": 3 / (327 + 22 + 32 + 3)},
    {"value": 7, "percent": 7 / (22 + 38 + 7 + 255)},
]

c = Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
c.add_xaxis(['油条','茶叶蛋','薯饼','蛋挞'])
c.add_yaxis("热量", list2, stack="stack1", category_gap="50%")
c.add_yaxis("脂肪", list3, stack="stack1", category_gap="50%")
c.add_yaxis("碳水化合物", list4, stack="stack1", category_gap="50%")
c.add_yaxis("蛋白质", list5, stack="stack1", category_gap="50%")
c.set_series_opts(
        label_opts=opts.LabelOpts(
            position="right",
            formatter=JsCode(
                "function(x){return Number(x.data.percent * 100).toFixed() + '%';}"
            ),
        )
    )
c.render('小吃营养分布.html')

'D:\\python项目\\我的爬虫+数据分析\\KFC早餐\\analyze\\小吃营养分布.html'

# 饮品

In [17]:
doujiang = {
    '热量' : 31,
    '脂肪' : 2,
    '碳水化合物' : 1.5,
    '蛋白质' : 3
}
coffe = {
    '热量' : 100,
    '脂肪' : 4,
    '碳水化合物' : 12,
    '蛋白质' : 4
}

In [30]:
from pyecharts import options as opts
from pyecharts.charts import Pie


c = Pie()
c.add(
        "",
        [list(z) for z in zip(["热量", "脂肪",'碳水化合物','蛋白质'],
                              [31,2,1.5, 3])],
        center=["20%", "50%"],
        radius=[60, 80],
    )
c.add(
        "",
        [list(z) for z in zip(["热量", "脂肪",'碳水化合物','蛋白质'],
                              [100,4,12, 4])],
        center=["55%", "50%"],
        radius=[60, 80],
    )
c.set_global_opts(
        title_opts=opts.TitleOpts(title="豆浆（左）咖啡（右）"),
        legend_opts=opts.LegendOpts(
            type_="scroll", pos_top="200%", pos_left="80%", orient="vertical"
        ),
    )
c.render('豆浆咖啡.html')

'D:\\python项目\\我的爬虫+数据分析\\KFC早餐\\analyze\\豆浆咖啡.html'