# 0. Preparation

In [None]:
# 导入常用模块
import itchat
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# 通过itchat登录微信
itchat.auto_login()

In [None]:
# 给“文件传输助手”发送一句话
itchat.send('Hello, world!', toUserName='filehelper')

# 1. Sex

In [None]:
# 获取并查看好友信息
friends = itchat.get_friends(update=True)
friends

In [None]:
# 将字典格式转换为DataFrame
friends_df = pd.DataFrame(friends)

In [None]:
friends_df.head()

In [None]:
friends_df.info()

In [None]:
# 统计性别比例
friends_df['Sex'].value_counts()
sex_counts = friends_df['Sex'].value_counts().values

In [None]:
# 用matplotlib绘制饼状图
plt.pie(sex_counts, explode=(0,0.1,0), shadow=True, autopct='%1.1f%%', startangle=90)

# 2. Friends distribution

In [None]:
# 获取好友省份信息
provinces = friends_df['Province']

In [None]:
# 查看分布情况
provinces.value_counts()

In [None]:
china_provinces = ['北京','天津','上海','重庆','河北','山西','辽宁','吉林','黑龙江','江苏','浙江','安徽','福建','江西','山东',
                   '河南','湖北','湖南','广东','海南','四川','贵州','云南','陕西','甘肃','青海','台湾','内蒙古','广西','西藏',
                   '宁夏','新疆','香港','澳门']

In [None]:
# 各省好友人数
value = [provinces.value_counts()[i] for i in china_provinces if i in provinces.unique() 
         
# 好友分布省份
attr = [i for i in china_provinces if i in provinces.unique()]

In [None]:
# 渲染好友分布地图
from pyecharts import Map

map_wechat = Map("我的微信好友分布", width=1200, height=600)
map_wechat.add("", attr, value, maptype='china', is_visualmap=True, is_map_symbol_show=False)
map_wechat.render()

In [None]:
# 显示好友分布地图
map_wechat

# 3. Profile photo

In [None]:
# 获取好友UserName（不是昵称）列表
user_names = friends_df['UserName']

In [None]:
nb_friends = friends_df.shape[0]

In [None]:
from tqdm import tqdm

In [None]:
for i in tqdm(range(nb_friends)):
    
    # 获取好友头像图片（base64编码）
    img_data = itchat.get_head_img(userName=user_names[i])
    
    # 另存为图片文件
    with open('profiles/{}.jpg'.format(i), 'wb') as f:
        f.write(img_data)

In [None]:
import requests

In [None]:
# ai.baidu.com开放平台API参数
client_id = 
client_secret = 
token_r = requests.post('https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(client_id, client_secret))

In [None]:
token_r.json()

In [None]:
import base64

In [None]:
# 将图片文件转码为base64
def jpg2base64(file_name):
    
    with open(file_name, 'rb') as f:
        return base64.b64encode(f.read())

In [None]:
# 图像识别API

url = 'https://aip.baidubce.com/rest/2.0/image-classify/v2/general?access_token=24.0210e6e5526b3c0d5747afcdc30fd425.2592000.1530952457.282335-11366325'

In [None]:
categories = [0 for i in range(nb_friends)]

In [None]:
for i in tqdm(range(nb_friends)):
    
    response = requests.post(url,headers={'Content-Type':'application/x-www-form-urlencoded'},
                         data={'image':jpg2base64('profiles/{}.jpg'.format(i))})
    
    try:
        categories[i] = response.json()['result'][0]['keyword']
        time.sleep(2)
    except (KeyError, TimeoutError):
        continue

In [None]:
# 转换为pandas.Series
categories = pd.Series(categories)

In [None]:
# 用柱状图表示出现5次以上的图片种类
categories.value_counts()[categories.value_counts() > 5].plot(kind='bar')

In [None]:
np.sum(categories[1].value_counts()[categories.value_counts() > 5])

In [None]:
# 选取可能是“真人照片”的种类
categories.value_counts()[['人物特写', '美女', '男人', '人脸', '中年男性', '女人', '罗大佑', '婚纱写真', '合照']]

In [None]:
# 统计“真人照片”的数量
nb_photo = np.sum(categories.value_counts()[['人物特写', '美女', '男人', '人脸', '中年男性', '女人', '罗大佑', '婚纱写真', '合照']])

In [None]:
nb_photo

# 4. 好友签名分析

In [None]:
import jieba

In [None]:
# 获取好友签名
signatures = friends_df['Signature'][friends_df['Signature'].isnull() == False]

In [None]:
# 合并为字符串格式
signature_text = ''.join(signatures)

In [None]:
signature_text

In [None]:
# 使用结巴分词
word_list = jieba.cut(signature_text, cut_all=True)
word_space_split = ' '.join(word_list)

In [None]:
word_space_split

In [None]:
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator, STOPWORDS

In [None]:
# 生成词云
coloring = np.array(Image.open("nmsl.jpg"))
stopwords = set(STOPWORDS) | {'span', 'class', 'emoji'}

In [None]:
my_wordcloud = WordCloud(background_color="white", max_words=500,
                         mask=coloring, max_font_size=200, random_state=42, stopwords=stopwords,
                         font_path="C:/Windows/Fonts/simhei.ttf").generate(word_space_split)

In [None]:
image_colors = ImageColorGenerator(coloring)

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1, 1, 1) 

image_colors = ImageColorGenerator(coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()