In [1]:
import pandas as pd

# melt/pivot 整理数据

## melt函数功能演示

##### 作用：将宽数据变长数据
**pd.melt(df, id_vars=None, value_vars=None, var_name=None, value_name='value')**<br/>或<br/>
**df.melt(id_vars=None, value_vars=None, var_name=None, value_name='value')**<br/>
参数：<br/>
* df：DataFrame数据<br/>
* id_vars：固定不动的列<br/>
* value_vars：需要被转换的列，如果不指定该参数，除id_vars之外的列都会进行转换

**加载数据 pew.csv 数据**

In [2]:
pew = pd.read_csv('./data/pew.csv')
pew.head()

Unnamed: 0,religion,<$10k,$10-20k,$20-30k,$30-40k,$40-50k,$50-75k,$75-100k,$100-150k,>150k,Don't know/refused
0,Agnostic,27,34,60,81,76,137,122,109,84,96
1,Atheist,12,27,37,52,35,70,73,59,74,76
2,Buddhist,27,21,30,34,33,58,62,39,53,54
3,Catholic,418,617,732,670,638,1116,949,792,633,1489
4,Don’t know/refused,15,14,15,11,10,35,21,17,18,116


In [6]:
pew_long = pd.melt(pew, id_vars=['religion'])
pew_long

Unnamed: 0,religion,variable,value
0,Agnostic,<$10k,27
1,Atheist,<$10k,12
2,Buddhist,<$10k,27
3,Catholic,<$10k,418
4,Don’t know/refused,<$10k,15
...,...,...,...
175,Orthodox,Don't know/refused,73
176,Other Christian,Don't know/refused,18
177,Other Faiths,Don't know/refused,71
178,Other World Religions,Don't know/refused,8


In [7]:
pew_long = pd.melt(pew, id_vars=['religion'], var_name='income', value_name='count')
pew_long.head()

Unnamed: 0,religion,income,count
0,Agnostic,<$10k,27
1,Atheist,<$10k,12
2,Buddhist,<$10k,27
3,Catholic,<$10k,418
4,Don’t know/refused,<$10k,15


## pivot 函数功能演示

##### 作用：将长数据变宽数据
**pd.pivot(df, index=None, columns=None, values=None)**<br/>
或<br/>
**df.pivot(index=None, columns=None, values=None)**<br/>
参数：
* df：DataFrame数据
* index：指定列标签，该列的值作为结果中的行标签
* columns：指定列标签，该列的值作为结果中的列标签
* values：指定列标签，该列的值作为结果中每个单元格的值

In [8]:
pew_wide = pew_long.pivot(index='religion', columns='income', values='count')
pew_wide

income,$10-20k,$100-150k,$20-30k,$30-40k,$40-50k,$50-75k,$75-100k,<$10k,>150k,Don't know/refused
religion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Agnostic,34,109,60,81,76,137,122,27,84,96
Atheist,27,59,37,52,35,70,73,12,74,76
Buddhist,21,39,30,34,33,58,62,27,53,54
Catholic,617,792,732,670,638,1116,949,418,633,1489
Don’t know/refused,14,17,15,11,10,35,21,15,18,116
Evangelical Prot,869,723,1064,982,881,1486,949,575,414,1529
Hindu,9,48,7,9,11,34,47,1,54,37
Historically Black Prot,244,81,236,238,197,223,131,228,78,339
Jehovah's Witness,27,11,24,24,21,30,15,20,6,37
Jewish,19,87,25,25,30,95,69,19,151,162


In [9]:
# 重置行标签
pew_wide.reset_index()

income,religion,$10-20k,$100-150k,$20-30k,$30-40k,$40-50k,$50-75k,$75-100k,<$10k,>150k,Don't know/refused
0,Agnostic,34,109,60,81,76,137,122,27,84,96
1,Atheist,27,59,37,52,35,70,73,12,74,76
2,Buddhist,21,39,30,34,33,58,62,27,53,54
3,Catholic,617,792,732,670,638,1116,949,418,633,1489
4,Don’t know/refused,14,17,15,11,10,35,21,15,18,116
5,Evangelical Prot,869,723,1064,982,881,1486,949,575,414,1529
6,Hindu,9,48,7,9,11,34,47,1,54,37
7,Historically Black Prot,244,81,236,238,197,223,131,228,78,339
8,Jehovah's Witness,27,11,24,24,21,30,15,20,6,37
9,Jewish,19,87,25,25,30,95,69,19,151,162


# stack/unstack 整理数据

## stack 功能演示

##### 作用：将行转列，同时列标签转成内层行标签
**df.stack()**

In [3]:
state_fruit = pd.read_csv('./data/state_fruit.csv', index_col=0)
state_fruit

Unnamed: 0,Apple,Orange,Banana
Texas,12,10,40
Arizona,9,7,12
Florida,0,14,190


In [10]:
# stack操作
state_fruit_series = state_fruit.stack()
state_fruit_series

Texas    Apple      12
         Orange     10
         Banana     40
Arizona  Apple       9
         Orange      7
         Banana     12
Florida  Apple       0
         Orange     14
         Banana    190
dtype: int64

In [11]:
state_fruit_tidy = state_fruit_series.reset_index()
state_fruit_tidy

Unnamed: 0,level_0,level_1,0
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


In [12]:
state_fruit_tidy.columns = ['state', 'fruit', 'weight']
state_fruit_tidy

Unnamed: 0,state,fruit,weight
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


## unstack 功能演示

##### 作用：将列转行，同时内层行标签转成列标签
**series.unstack()**

# wide_to_long 整理数据

##### 作用：宽数据变长数据，能够把列名起始部分相同的列放在一起进行转换
**pd.wide_to_long(df, stubnames, i, j, sep='')**<br/>
参数：
* df：DataFrame数据
* stubnames：转换列的起始部分(前缀)
* i：固定不动的列，作为转换结果的行标签
* j：转化列后缀值组成的列的列标签名
* sep：转换列的前缀和后缀的分割符

**加载 movie.csv 数据**

In [5]:
movies = pd.read_csv('./data/movie.csv')
movies.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0
