# 2-4. Pandas 패키지

## 2-4-1. Pandas의 자료 구조

### 2-4-1-1. Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
prcp = [95,45,74,100]
series = pd.Series(prcp)

In [3]:
ind = ['Seoul','Tokyo', 'Beijing', 'Singapore']
series = pd.Series(prcp, index=ind)

In [4]:
series

Seoul         95
Tokyo         45
Beijing       74
Singapore    100
dtype: int64

In [5]:
prcp_dic = {'Seoul':95, 'Tokyo':45, 'Beijing' : 74, 'Singapore': 100}
series = pd.Series(prcp_dic)
series

Seoul         95
Tokyo         45
Beijing       74
Singapore    100
dtype: int64

In [6]:
series['Seoul']

95

In [7]:
cities = ['Seoul','Tokyo','New York','Beijing','Singapore']
series = pd.Series(prcp_dic, index=cities)

In [8]:
series

Seoul         95.0
Tokyo         45.0
New York       NaN
Beijing       74.0
Singapore    100.0
dtype: float64

In [9]:
pd.isnull(series)

Seoul        False
Tokyo        False
New York      True
Beijing      False
Singapore    False
dtype: bool

In [10]:
pd.notnull(series)

Seoul         True
Tokyo         True
New York     False
Beijing       True
Singapore     True
dtype: bool

In [11]:
prcp_today = series
prcp_tomorrow = pd.Series({'Seoul':80, 'Tokyo':40, 'Beijing':92, 'Singapore': 98, 'New York' : 95, 'London':81 })

In [12]:
prcp_today + prcp_tomorrow # 누락된 자료를 제외하고 같은 인덱스끼리 연산이 이뤄짐

Beijing      166.0
London         NaN
New York       NaN
Seoul        175.0
Singapore    198.0
Tokyo         85.0
dtype: float64

### 2-4-1-2. DataFrame

In [13]:
data = {'prcp2':[80,40,92,98,95,81], 'prcp1':[95,45,74,100,0,0],
        'cities' : ['Seoul','Tokyo', 'Beijing' ,'Singapore','New York','London']}
prcps=pd.DataFrame(data)
print(prcps)

   prcp2  prcp1     cities
0     80     95      Seoul
1     40     45      Tokyo
2     92     74    Beijing
3     98    100  Singapore
4     95      0   New York
5     81      0     London


In [14]:
prcps = pd.DataFrame(data, columns=['cities','prcp1','prcp2'])
print(prcps)

      cities  prcp1  prcp2
0      Seoul     95     80
1      Tokyo     45     40
2    Beijing     74     92
3  Singapore    100     98
4   New York      0     95
5     London      0     81


## 2-4-2. 자료 추가하기 (Join과 Merge 사용법)

In [15]:
#DataFrame 을 변수 left에 저장
left = pd.DataFrame([['A0','B0'],['A1','B1'],['A2','B2']],columns=['A','B'])
left[['A','B']][0:3]

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2


In [16]:
right = pd.DataFrame({'C':['C0','C1','C2'],'D':['D0','D2','D3']}, index=[0,2,3])
right

Unnamed: 0,C,D
0,C0,D0
2,C1,D2
3,C2,D3


In [17]:
left.join(right) #default 값이 how=left 로 설정되어있으므로, 열(column)만 추가됨
#같은 index가 있는 0~2만 나타남 공통된 행의 값들만 보여줌.

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C1,D2


In [18]:
left.join(right,how='outer')
#한쪽 자료에만 있는 행의 값들도 보여준다. 

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C1,D2
3,,,C2,D3


In [19]:
pd.merge(left, right, left_index=True, right_index=True, how='outer')

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C1,D2
3,,,C2,D3


In [20]:
pd.merge(left, right, left_index=True, right_index=True, how='inner')

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
2,A2,B2,C1,D2


In [21]:
prcps = pd.DataFrame(data)
prcps

Unnamed: 0,prcp2,prcp1,cities
0,80,95,Seoul
1,40,45,Tokyo
2,92,74,Beijing
3,98,100,Singapore
4,95,0,New York
5,81,0,London


In [22]:
add_city = pd.DataFrame({'cities':'Paris','prcp2':100}, index=['6'])
add_city

Unnamed: 0,cities,prcp2
6,Paris,100


In [23]:
prcps1 = pd.merge(prcps, add_city, how='outer')
prcps1

Unnamed: 0,prcp2,prcp1,cities
0,80,95.0,Seoul
1,40,45.0,Tokyo
2,92,74.0,Beijing
3,98,100.0,Singapore
4,95,0.0,New York
5,81,0.0,London
6,100,,Paris


In [24]:
cloud = pd.DataFrame({'cities':['Seoul','Tokyo', 'Beijing' ,'Singapore','New York','London','Paris'], 
                      'cloud':[10,6,7,10,8,5,10]}, index=range(7))
cloud

Unnamed: 0,cities,cloud
0,Seoul,10
1,Tokyo,6
2,Beijing,7
3,Singapore,10
4,New York,8
5,London,5
6,Paris,10


In [25]:
obs = pd.merge(prcps1,cloud, how='outer')
pd.DataFrame(obs, columns=['cities','prcp1','prcp2','cloud'])

Unnamed: 0,cities,prcp1,prcp2,cloud
0,Seoul,95.0,80,10
1,Tokyo,45.0,40,6
2,Beijing,74.0,92,7
3,Singapore,100.0,98,10
4,New York,0.0,95,8
5,London,0.0,81,5
6,Paris,,100,10


## 2-4-3. 자료 불러오기

In [26]:
air = pd.read_csv('station.csv')

In [27]:
air.head() #air 자료의 처음 5줄을 표출

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
0,1905,1.55,-2.45,4.65,9.25,14.45,20.05,24.05,23.65,20.15,14.75,6.45,1.75,0.15,9.45,22.58,13.78,11.49
1,1906,-3.75,-3.15,2.75,9.85,15.25,20.95,24.55,24.75,20.35,14.85,4.35,-0.45,-1.72,9.28,23.42,13.18,11.04
2,1907,0.15,-2.85,3.55,10.55,14.75,19.65,23.85,25.85,21.55,16.05,6.65,-2.25,-1.05,9.62,23.12,14.75,11.61
3,1908,-2.65,-3.05,2.95,10.05,14.65,19.75,23.05,24.55,20.75,15.65,5.45,1.05,-2.65,9.22,22.45,13.95,10.74
4,1909,-1.55,-2.15,2.15,10.15,14.65,19.45,23.95,25.45,21.25,14.15,7.05,-1.95,-0.88,8.98,22.95,14.15,11.3


In [28]:
air.shape #배열정보 표출

(118, 18)

In [29]:
air.tail()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
113,2018,-3.6,-1.6,7.3,12.25,17.2,21.7,26.95,28.35,21.75,14.05,8.7,-0.05,-2.17,12.25,25.67,14.83,12.65
114,2019,-0.25,1.15,6.85,11.75,18.05,21.75,25.25,27.15,22.75,16.9,8.35,1.9,0.28,12.22,24.72,16.0,13.3
115,2020,2.1,2.8,7.25,10.7,17.1,22.25,23.5,26.15,21.55,14.75,8.45,0.3,2.27,11.68,23.97,14.92,13.21
116,2021,-2.05,2.15,8.25,13.45,16.45,21.95,27.5,25.75,22.85,16.05,8.95,1.4,0.13,12.72,25.07,15.95,13.47
117,2022,-1.95,-1.05,7.05,13.35,17.45,999.9,999.9,999.9,999.9,999.9,999.9,999.9,-0.53,12.62,999.9,999.9,999.9


In [30]:
air[air==999.9]=np.nan #결측치 NaN으로 바꿔줌.

In [31]:
air.tail() #결측치 NaN으로 바뀐 것 확인할 수 있음.

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
113,2018,-3.6,-1.6,7.3,12.25,17.2,21.7,26.95,28.35,21.75,14.05,8.7,-0.05,-2.17,12.25,25.67,14.83,12.65
114,2019,-0.25,1.15,6.85,11.75,18.05,21.75,25.25,27.15,22.75,16.9,8.35,1.9,0.28,12.22,24.72,16.0,13.3
115,2020,2.1,2.8,7.25,10.7,17.1,22.25,23.5,26.15,21.55,14.75,8.45,0.3,2.27,11.68,23.97,14.92,13.21
116,2021,-2.05,2.15,8.25,13.45,16.45,21.95,27.5,25.75,22.85,16.05,8.95,1.4,0.13,12.72,25.07,15.95,13.47
117,2022,-1.95,-1.05,7.05,13.35,17.45,,,,,,,,-0.53,12.62,,,


In [32]:
air.columns #열 정보

Index(['YEAR', 'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP',
       'OCT', 'NOV', 'DEC', 'D-J-F', 'M-A-M', 'J-J-A', 'S-O-N', 'metANN'],
      dtype='object')

In [33]:
air.describe() #각 열의 통게치 산출

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
count,118.0,118.0,118.0,118.0,117.0,118.0,115.0,116.0,116.0,114.0,115.0,116.0,116.0,118.0,118.0,116.0,115.0,116.0
mean,1963.5,-2.658559,-0.547797,4.558559,10.961111,16.272203,20.753913,24.386897,25.652586,21.164912,14.993043,7.448103,0.211552,-0.997966,10.603898,23.596552,14.528174,11.924138
std,34.207699,2.041565,1.87736,1.655818,1.284204,1.06758,0.92194,1.151129,1.065061,0.806902,1.060941,1.573259,1.916236,1.461526,1.105138,0.833403,0.878148,0.790445
min,1905.0,-7.45,-4.16,0.44,8.65,13.45,18.35,21.75,22.91,19.25,12.95,3.15,-4.35,-4.48,8.42,21.65,11.82,10.35
25%,1934.25,-4.0175,-2.195,3.535,9.96,15.655,20.1,23.55,24.9575,20.65,14.15,6.5175,-0.9775,-2.0025,9.7575,22.9775,14.03,11.3625
50%,1963.5,-2.77,-0.615,4.655,10.75,16.175,20.75,24.375,25.75,21.15,14.9,7.515,0.4,-1.0,10.545,23.65,14.48,11.84
75%,1992.75,-1.2925,0.945,5.65,11.63,16.9875,21.36,25.25,26.36,21.6025,15.795,8.64,1.75,0.1175,11.39,24.08,15.025,12.5225
max,2022.0,2.1,4.4,8.25,14.59,18.7,22.9,27.76,28.35,23.32,18.64,11.4,4.9,2.59,13.25,25.67,16.52,13.71


In [34]:
air.sort_values(by='metANN').head() #metANN 즉 연간 평균 강수량에 따라 분류 (오름차순)

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
31,1936,-7.05,-3.25,0.55,9.25,15.45,20.75,23.65,23.65,21.45,15.05,7.25,0.85,-4.28,8.42,22.68,14.58,10.35
52,1957,-2.83,-3.66,0.44,10.24,16.2,19.65,22.21,24.3,19.58,13.46,9.39,0.41,-3.59,8.96,22.05,14.14,10.39
42,1947,-2.55,-3.95,2.25,10.65,15.15,18.35,23.05,25.55,20.65,13.35,4.95,-3.95,-2.92,9.35,22.32,12.98,10.43
8,1913,-4.04,-3.05,2.05,10.75,14.05,19.15,21.75,24.05,20.65,14.65,6.85,-0.45,-2.88,8.95,21.65,14.05,10.44
12,1917,-7.45,-2.85,3.25,9.95,13.55,20.45,25.25,25.25,21.05,15.35,4.95,-4.35,-3.42,8.92,23.65,13.78,10.73


In [35]:
air.sort_values(by='metANN').tail()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
116,2021,-2.05,2.15,8.25,13.45,16.45,21.95,27.5,25.75,22.85,16.05,8.95,1.4,0.13,12.72,25.07,15.95,13.47
111,2016,-2.0,1.05,6.75,13.35,18.7,22.4,25.55,27.75,23.25,16.35,7.5,2.2,0.52,12.93,25.23,15.7,13.6
93,1998,-0.77,3.18,6.92,14.59,18.23,21.2,24.71,25.62,23.32,17.58,7.88,2.69,1.48,13.25,23.84,16.26,13.71
45,1950,-0.72,0.26,5.21,13.3,17.63,,,,,,,,-0.27,12.05,,,
117,2022,-1.95,-1.05,7.05,13.35,17.45,,,,,,,,-0.53,12.62,,,


### 2-4-3-2. DataFrame을 이용한 자료처리

In [36]:
air['JAN'][0:10]

0    1.55
1   -3.75
2    0.15
3   -2.65
4   -1.55
5   -1.75
6   -3.65
7   -3.15
8   -4.04
9   -1.05
Name: JAN, dtype: float64

In [37]:
air[air['D-J-F']>=1] #DJF 평균기온이 1도 이상인 연도의 값들을 출력 

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
11,1916,0.85,-0.15,0.85,10.45,14.15,19.25,23.15,25.55,20.05,14.35,7.65,0.05,1.05,8.48,22.65,14.02,11.55
44,1949,-1.66,2.03,4.1,10.38,16.28,20.58,25.69,27.06,22.17,15.67,8.48,-0.34,1.64,10.25,24.44,15.44,12.94
54,1959,-4.25,2.58,6.29,10.5,17.59,20.31,24.31,26.21,21.47,16.0,6.51,1.51,1.08,11.46,23.61,14.66,12.7
74,1979,1.39,1.11,6.23,10.89,16.14,21.03,24.27,25.51,20.12,16.28,6.6,2.35,1.64,11.09,23.6,14.33,12.66
84,1989,0.75,2.66,5.76,13.23,16.88,20.68,24.4,25.47,20.82,14.07,7.79,1.82,1.4,11.96,23.52,14.23,12.77
87,1992,0.01,0.93,7.09,11.39,15.59,19.83,24.31,24.96,20.82,14.47,6.97,1.75,1.12,11.36,23.03,14.09,12.4
93,1998,-0.77,3.18,6.92,14.59,18.23,21.2,24.71,25.62,23.32,17.58,7.88,2.69,1.48,13.25,23.84,16.26,13.71
94,1999,-0.28,1.09,6.28,13.13,16.85,22.14,25.5,26.07,22.89,14.68,8.62,1.06,1.17,12.09,24.57,15.4,13.3
97,2002,0.64,2.74,7.84,,17.76,20.94,24.35,23.6,,14.57,3.9,1.18,1.07,12.98,22.96,12.55,12.39
102,2007,1.06,4.4,6.21,11.66,16.64,22.18,23.05,25.92,21.95,15.59,7.89,2.56,2.59,11.5,23.72,15.14,13.24


In [40]:
air[(air['D-J-F']>=1)&(air['M-A-M']>=11)]['YEAR']

54     1959
74     1979
84     1989
87     1992
93     1998
94     1999
97     2002
102    2007
104    2009
115    2020
Name: YEAR, dtype: int64

### 2-4-3-3. 그룹 연산 (Groupby)

In [42]:
table=air.describe() #table을 찍어보면 갖가지 정보를 담고있음 (count, mean, std, min, 25%, 50%, ..max, ,min 등등)
Q1=table['metANN']['25%'] # Q1은 metANN의 제 1분위수
Q3=table['metANN']['75%'] # Q3은 metANN의 제 3분위수
howIsThisYear = np.where(np.isnan(air['metANN']), 'NaN',
                        np.where(air['metANN']<Q1, 'cold',
                        np.where(air['metANN']>Q3, 'hot','plain')))

air['howIsThisYear']=howIsThisYear
air.groupby('howIsThisYear')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000016100705EB0>

In [43]:
Q1

11.362499999999999

In [45]:
Q3

12.522499999999999

In [46]:
table

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
count,118.0,118.0,118.0,118.0,117.0,118.0,115.0,116.0,116.0,114.0,115.0,116.0,116.0,118.0,118.0,116.0,115.0,116.0
mean,1963.5,-2.658559,-0.547797,4.558559,10.961111,16.272203,20.753913,24.386897,25.652586,21.164912,14.993043,7.448103,0.211552,-0.997966,10.603898,23.596552,14.528174,11.924138
std,34.207699,2.041565,1.87736,1.655818,1.284204,1.06758,0.92194,1.151129,1.065061,0.806902,1.060941,1.573259,1.916236,1.461526,1.105138,0.833403,0.878148,0.790445
min,1905.0,-7.45,-4.16,0.44,8.65,13.45,18.35,21.75,22.91,19.25,12.95,3.15,-4.35,-4.48,8.42,21.65,11.82,10.35
25%,1934.25,-4.0175,-2.195,3.535,9.96,15.655,20.1,23.55,24.9575,20.65,14.15,6.5175,-0.9775,-2.0025,9.7575,22.9775,14.03,11.3625
50%,1963.5,-2.77,-0.615,4.655,10.75,16.175,20.75,24.375,25.75,21.15,14.9,7.515,0.4,-1.0,10.545,23.65,14.48,11.84
75%,1992.75,-1.2925,0.945,5.65,11.63,16.9875,21.36,25.25,26.36,21.6025,15.795,8.64,1.75,0.1175,11.39,24.08,15.025,12.5225
max,2022.0,2.1,4.4,8.25,14.59,18.7,22.9,27.76,28.35,23.32,18.64,11.4,4.9,2.59,13.25,25.67,16.52,13.71


In [50]:
air['howIsThisYear']

0      plain
1       cold
2      plain
3       cold
4       cold
       ...  
113      hot
114      hot
115      hot
116      hot
117      NaN
Name: howIsThisYear, Length: 118, dtype: object

In [51]:
howIsThisYear

array(['plain', 'cold', 'plain', 'cold', 'cold', 'cold', 'cold', 'cold',
       'cold', 'plain', 'cold', 'plain', 'cold', 'cold', 'plain', 'plain',
       'plain', 'plain', 'cold', 'plain', 'cold', 'plain', 'plain',
       'plain', 'plain', 'plain', 'cold', 'plain', 'plain', 'cold', 'hot',
       'cold', 'plain', 'plain', 'plain', 'cold', 'plain', 'plain',
       'plain', 'plain', 'cold', 'hot', 'cold', 'plain', 'hot', 'NaN',
       'cold', 'plain', 'plain', 'plain', 'plain', 'cold', 'cold',
       'plain', 'hot', 'plain', 'plain', 'plain', 'plain', 'plain',
       'plain', 'plain', 'plain', 'cold', 'cold', 'cold', 'plain',
       'plain', 'plain', 'cold', 'hot', 'cold', 'plain', 'plain', 'hot',
       'cold', 'cold', 'plain', 'plain', 'plain', 'plain', 'cold',
       'plain', 'plain', 'hot', 'hot', 'plain', 'plain', 'plain', 'hot',
       'plain', 'plain', 'hot', 'hot', 'hot', 'hot', 'hot', 'plain',
       'hot', 'hot', 'hot', 'hot', 'hot', 'hot', 'hot', 'plain', 'plain',
       'plai

In [52]:
air[0:50].groupby('howIsThisYear').groups

{'NaN': [45], 'cold': [1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 18, 20, 26, 29, 31, 35, 40, 42, 46], 'hot': [30, 41, 44], 'plain': [0, 2, 9, 11, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 27, 28, 32, 33, 34, 36, 37, 38, 39, 43, 47, 48, 49]}

In [56]:
air[0:50].groupby('howIsThisYear').groups.keys()

dict_keys(['NaN', 'cold', 'hot', 'plain'])

In [57]:
air[0:50].groupby('howIsThisYear').mean() #group별로 평균 볼 수 있음 (NaN 그룹 평균, cold 그룹 평균, hot 그룹 평균 ...)

Unnamed: 0_level_0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
howIsThisYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
,1950.0,-0.72,0.26,5.21,13.3,17.63,,,,,,,,-0.27,12.05,,,
cold,1923.736842,-4.128421,-2.002632,3.065789,9.818421,15.144737,19.844444,23.523684,25.081579,20.681579,14.644737,6.718421,-0.413158,-2.388421,9.342632,22.825263,14.014737,10.947895
hot,1943.333333,-1.486667,1.176667,4.5,10.726667,15.993333,20.626667,25.363333,26.82,21.656667,15.79,8.926667,-1.713333,0.646667,10.406667,24.27,15.456667,12.693333
plain,1931.259259,-2.802222,-1.17037,4.052593,10.632222,15.878889,20.535556,24.755185,25.968148,21.095926,14.832963,7.263333,-0.035185,-1.305185,10.187778,23.752963,14.396667,11.758519


In [58]:
air[0:50].groupby('howIsThisYear').agg(['mean','count','std'])

Unnamed: 0_level_0,YEAR,YEAR,YEAR,JAN,JAN,JAN,FEB,FEB,FEB,MAR,...,M-A-M,J-J-A,J-J-A,J-J-A,S-O-N,S-O-N,S-O-N,metANN,metANN,metANN
Unnamed: 0_level_1,mean,count,std,mean,count,std,mean,count,std,mean,...,std,mean,count,std,mean,count,std,mean,count,std
howIsThisYear,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
,1950.0,1,,-0.72,1,,0.26,1,,5.21,...,,,0,,,0,,,0,
cold,1923.736842,19,14.624341,-4.128421,19,1.762468,-2.002632,19,1.528386,3.065789,...,0.486688,22.825263,19,0.52356,14.014737,19,0.848007,10.947895,19,0.308627
hot,1943.333333,3,7.371115,-1.486667,3,0.212211,1.176667,3,0.739008,4.5,...,0.551936,24.27,3,0.48775,15.456667,3,0.215484,12.693333,3,0.215716
plain,1931.259259,27,13.50952,-2.802222,27,2.059198,-1.17037,27,1.329371,4.052593,...,0.68703,23.752963,27,0.683789,14.396667,27,0.594953,11.758519,27,0.341588


In [59]:
multiple_funcs = {'D-J-F':['sum','mean','std','count'],'J-J-A':'std'}
air[0:50].groupby('howIsThisYear').agg(multiple_funcs)

Unnamed: 0_level_0,D-J-F,D-J-F,D-J-F,D-J-F,J-J-A
Unnamed: 0_level_1,sum,mean,std,count,std
howIsThisYear,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
,-0.27,-0.27,,1,
cold,-45.38,-2.388421,0.977407,19,0.52356
hot,1.94,0.646667,0.901628,3,0.48775
plain,-35.24,-1.305185,1.13932,27,0.683789
