In [79]:
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import HeatMap
import numpy as np

In [275]:
df = pd.read_csv('vgsales.csv')
df = df.dropna()
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [12]:
df.info()
df.Year.astype('int')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16291 entries, 0 to 16597
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Rank          16291 non-null  int64  
 1   Name          16291 non-null  object 
 2   Platform      16291 non-null  object 
 3   Year          16291 non-null  float64
 4   Genre         16291 non-null  object 
 5   Publisher     16291 non-null  object 
 6   NA_Sales      16291 non-null  float64
 7   EU_Sales      16291 non-null  float64
 8   JP_Sales      16291 non-null  float64
 9   Other_Sales   16291 non-null  float64
 10  Global_Sales  16291 non-null  float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.5+ MB


0        2006
1        1985
2        2008
3        2009
4        1996
         ... 
16593    2002
16594    2003
16595    2008
16596    2010
16597    2003
Name: Year, Length: 16291, dtype: int32

## 列名含义
* 'Rank' 销售排名
* 'Name' 游戏名
* 'Platform' 发售平台
* 'Year' 发售年份
* 'Genre' 游戏类型
* 'Publisher' 发行厂家
* 'NA_Sales' 北美销量
* 'EU_Sales' 欧洲销量
* 'JP_Sales' 日本销量
* 'Other_Sales' 其他地区销量
* 'Global_Sales' 全球销量

## 卖得最好的20款游戏、平台、年份、厂家

In [22]:
df.sort_values('Global_Sales',ascending=False)[:20][['Name','Platform','Year','Publisher']]

Unnamed: 0,Name,Platform,Year,Publisher
0,Wii Sports,Wii,2006.0,Nintendo
1,Super Mario Bros.,NES,1985.0,Nintendo
2,Mario Kart Wii,Wii,2008.0,Nintendo
3,Wii Sports Resort,Wii,2009.0,Nintendo
4,Pokemon Red/Pokemon Blue,GB,1996.0,Nintendo
5,Tetris,GB,1989.0,Nintendo
6,New Super Mario Bros.,DS,2006.0,Nintendo
7,Wii Play,Wii,2006.0,Nintendo
8,New Super Mario Bros. Wii,Wii,2009.0,Nintendo
9,Duck Hunt,NES,1984.0,Nintendo


## 游戏开发商、平台、游戏类型热图

In [265]:
# 游戏类型列表
genre_list = df.Genre.value_counts().index.tolist()
genre_list.sort()
# 游戏开发商列表 取前24
publisher_list = df.Publisher.value_counts().index.tolist()[:24]
publisher_list.sort()
#游戏平台列表
Platform_list = df.Platform.value_counts().index.tolist()
Platform_list.sort()

In [213]:
#发行游戏数量前24名的游戏公司
publisher24_df = df.loc[df['Publisher'].isin(df['Publisher'].value_counts()[:24].index)]
#数据透视表
publisher24_df_pt = pd.pivot_table(publisher24_df,index=['Publisher'],columns=['Genre'],values=['Global_Sales'],aggfunc='count',fill_value=0)

In [262]:
value = [[i, j, int(publisher24_df_pt.values[i,j])] for i in range(24) for j in range(12)]
c = (
    HeatMap(init_opts=opts.InitOpts())
    .add_xaxis(publisher_list)
    .add_yaxis("", genre_list, value,label_opts=opts.LabelOpts(
            is_show=True, color="auto", position="insideLeft",))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="每个游戏商擅长于制作什么游戏"),
        visualmap_opts=opts.VisualMapOpts(max_=600,pos_left=0),
        xaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',rotate=-35,font_size=6)),
        yaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',font_size=8)),
    )
)
c.render_notebook()

In [267]:
#数据透视表
platform_df_pt = pd.pivot_table(df,index=['Platform'],columns=['Genre'],values=['Global_Sales'],aggfunc='count',fill_value=0)

In [274]:
value = [[i, j, int(platform_df_pt.values[i,j])] for i in range(31) for j in range(12)]
c = (
    HeatMap(init_opts=opts.InitOpts())
    .add_xaxis(Platform_list)
    .add_yaxis("", genre_list, value,label_opts=opts.LabelOpts(
            is_show=True, color="auto", position="insideLeft",))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="平台游戏数量热图"),
        visualmap_opts=opts.VisualMapOpts(max_=400,pos_left=0),
        xaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',rotate=-35,font_size=6)),
        yaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',font_size=8)),
    )
)
c.render_notebook()

In [280]:
df.groupby('Genre').sum().iloc[:,2:-1]

Unnamed: 0_level_0,NA_Sales,EU_Sales,JP_Sales,Other_Sales
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Action,861.77,516.48,158.65,184.92
Adventure,101.93,63.74,51.99,16.7
Fighting,220.74,100.0,87.15,36.19
Misc,396.92,211.77,106.67,73.92
Platform,445.99,200.65,130.65,51.51
Puzzle,122.01,50.52,56.68,12.47
Racing,356.93,236.31,56.61,76.68
Role-Playing,326.5,187.57,350.29,59.38
Shooter,575.16,310.45,38.18,101.9
Simulation,181.78,113.02,63.54,31.36


In [282]:
value = [[i, j, int(df.groupby('Genre').sum().iloc[:,2:-1].values[i,j])] for i in range(12) for j in range(4)]
c = (
    HeatMap(init_opts=opts.InitOpts())
    .add_xaxis(genre_list)
    .add_yaxis("", ['NA_Sales','EU_Sales','JP_Sales','Other_Sales'], value,label_opts=opts.LabelOpts(
            is_show=True, color="auto", position="insideLeft",))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="销售地区与游戏类型热图"),
        visualmap_opts=opts.VisualMapOpts(max_=900,pos_left=0),
        xaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',rotate=-35,font_size=10)),
        yaxis_opts=opts.AxisOpts(
            type_="category",axislabel_opts=opts.LabelOpts(position='inside',font_size=12)),
    )
)
c.render_notebook()

欧美更偏爱设计动作类游戏，日本更偏爱角色扮演类游戏。