In [1]:
import pandas as pd

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import sys


from win32ui import types

print('Python version:', sys.version)
print('Pandas version:', pd.__version__)
print('Numpy version:', np.__version__)
print('Matplotlib version:', matplotlib.__version__)

Python version: 3.10.9 (tags/v3.10.9:1dd9be6, Dec  6 2022, 20:01:21) [MSC v.1934 64 bit (AMD64)]
Pandas version: 2.2.2
Numpy version: 2.1.1
Matplotlib version: 3.9.2


In [3]:
dtypes = {
    'user_id': 'int32',
    'item_id': 'int32',
    'behavior_type': 'object',
    'item_category': 'int16',
    'day': 'int8',
    'day_name': 'object',
    'hour': 'int8',
    'month': "int8",
    'md': 'object',
    'time':'object',
}

data = pd.read_csv('./user_action/clear_data2.csv',
                  dtype=dtypes,  # 设定读取字段的类型
                  engine='c', # 设定读取的引擎
                  )

In [5]:
dicts = {
    1:'浏览',
    2:'收藏',
    3:'加购物车',
    4:'购买'
}
data['behavior_type'] = data['behavior_type'].astype(int).map(dicts)

In [7]:
data['time'] = pd.to_datetime(data['time'])

In [8]:
# 让12月19日是数据中的最后一个日期
latest_date = data['time'].max() + pd.Timedelta(days=1)

In [10]:
# 购买的频次
data['frequency'] = data.groupby('user_id')['time'].transform('count')

In [15]:
data.head()

Unnamed: 0,user_id,item_id,behavior_type,item_category,time,day,day_name,hour,month,md,frequency
0,98047837,232431562,浏览,4245,2014-12-06 02:00:00,6,5,2,12,12-06,1779
1,97726136,383583590,浏览,5894,2014-12-09 20:00:00,9,1,20,12,12-09,676
2,98607707,64749712,浏览,2883,2014-12-18 11:00:00,18,3,11,12,12-18,1525
3,98662432,320593836,浏览,6562,2014-12-06 10:00:00,6,5,10,12,12-06,41
4,98145908,290208520,浏览,13926,2014-12-16 21:00:00,16,1,21,12,12-16,170


In [13]:
data.groupby('user_id')['time'].count()

user_id
4913         1742
6118          117
7528          214
7591          859
12645         268
             ... 
142376113     322
142412247     335
142430177    1481
142450275    6819
142455899    1351
Name: time, Length: 10000, dtype: int64

In [16]:
# 得到日期间隔的天数
data['recency'] = (latest_date - data.groupby('user_id')['time'].transform('max')).dt.days

In [18]:
# 购买的行为设置为1 其他行为 设置为false
data['monetary'] = (data['behavior_type'] == '购买').astype(int)##前面得到一个布尔值 运用astype方法 true表示是1,false表示是0
data['monetary'] = data.groupby('user_id')['monetary'].transform('sum')# 按用户分组,统计每个用户购买了多少次

In [19]:
rfm = data.groupby('user_id').agg({'recency':'min','frequency':'max','monetary':'max'}).reset_index()
quantiles = rfm.quantile(q=[0.25,0.5,0.75])

In [21]:
quantiles

Unnamed: 0,user_id,recency,frequency,monetary
0.25,35471930.0,1.0,314.0,2.0
0.5,72514220.0,1.0,747.0,7.0
0.75,107305200.0,2.0,1547.25,15.0


In [44]:
def RMScore(x):
    if x<= quantiles['recency'][0.25]:
        return 1
    elif x<= quantiles['recency'][0.5]:
        return 2
    elif x<= quantiles['recency'][0.75]:
        return 3
    else :
        return 4

In [42]:
def FMScore(x,col):
    if x<= quantiles[col][0.25]:
        return 4
    elif x<= quantiles[col][0.5]:
        return 3
    elif x<= quantiles[col][0.75]:
        return 2
    else :
        return 1


In [45]:
rfm['R'] = rfm['recency'].apply(RMScore)
rfm['F'] = rfm['frequency'].apply(lambda x:FMScore(x,'frequency'))
rfm['M'] = rfm['monetary'].apply(lambda x:FMScore(x,'monetary'))

In [50]:
from pyecharts.globals import CurrentConfig  # 加载全局配置
CurrentConfig.ONLINE_HOST = "http://127.0.0.1:8000/assets/"  # 设定静态资源地址
from pyecharts.datasets import register_url  # 部分地图文件注册
# 注册本地服务器的地图数据源
register_url("http://127.0.0.1:8000/") 

In [48]:
from pyecharts import options as opts
import json
import os
filenames = os.listdir('./user_action/maps/')
hnhb = {'type': 'FeatureCollection','features':[]}
for i in filenames:
    with open(f"./user_action/maps/{i}",'r',encoding='utf-8') as f:
        data = json.loads(f.read())
        hnhb['features'].append(data)
target_data = """{
        "type": "FeatureCollection",
        "features": []}"""
target_data = json.loads(target_data)
list_region = os.listdir('./user_action/maps/')
name_list = []
for lr in list_region:
    with open(f"./user_action/maps/{lr}", 'r', encoding='utf-8') as f:
        temp = json.load(f)
        temp = temp['features']
        for tp in temp:
            name = tp['properties']['name']
            # 将area加入tp节点并传入target_data
            tp['properties']['area'] = '湖南湖北'
            target_data['features'].append(tp)
            name_list.append((name, np.random.randint(100, 5000)))
from pyecharts.charts import Map
maps = Map(init_opts=opts.InitOpts(theme='light',
                                      width='1000px',
                                      height='600px'))
maps.add_js_funcs(f"echarts.registerMap('湖南湖北', {target_data});")
# 引用添加的地图
maps.add("湖南湖北",[('河南省',100)],maptype='湖南湖北')
maps.render()

'C:\\Users\\wang\\Desktop\\pyLearn\\综合实训课程1\\淘宝用户行为分析\\render.html'

In [52]:
from pyecharts import options as opts
import json
import os
import numpy as np
from pyecharts.charts import Map

# 获取文件列表
filenames = os.listdir('./user_action/maps/')

# 创建湖南湖北的地理数据集合
hnhb = {'type': 'FeatureCollection', 'features': []}
for i in filenames:
    with open(f"./user_action/maps/{i}", 'r', encoding='utf-8') as f:
        data = json.loads(f.read())
        hnhb['features'].append(data)

target_data = """{
        "type": "FeatureCollection",
        "features": []}"""
target_data = json.loads(target_data)

list_region = os.listdir('./user_action/maps/')
name_list = []
for lr in list_region:
    with open(f"./user_action/maps/{lr}", 'r', encoding='utf-8') as f:
        temp = json.load(f)
        temp = temp['features']
        for tp in temp:
            name = tp['properties']['name']
            if name in ['湖南省', '湖北省']:
                # 将 area 加入 tp 节点并传入 target_data
                tp['properties']['area'] = '湖南湖北'
                target_data['features'].append(tp)
                name_list.append((name, np.random.randint(100, 5000)))

# 创建地图对象
maps = Map(init_opts=opts.InitOpts(theme='light',
                                      width='1000px',
                                      height='600px'))

# 注册湖南湖北地图
maps.add_js_funcs(f"echarts.registerMap('湖南湖北', {target_data});")

# 绘制湖南湖北地图，不显示具体市的数据
maps.add("湖南湖北", [('湖南湖北', np.random.randint(100, 5000))], maptype='湖南湖北')

# 渲染地图
maps.render()

FileNotFoundError: [WinError 3] 系统找不到指定的路径。: './user_action/maps/'