In [6]:
import numpy as np
import pandas as pd

'''
The following code is to help you play with the concept of Dataframe in Pandas.
---
本段代码将帮助您了解 Pandas 中的 Dataframe（数据框）.

You can think of a Dataframe as something with rows and columns. It is
similar to a spreadsheet, a database table, or R's data.frame object.
---
您可以将 Dataframe 理解为有行和列的某种数据.
Dataframe 和电子表格、数据库表、R's data.frame object (??) 非常类似.

*This playground is inspired by Greg Reda's post on Intro to Pandas Data Structures:
http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/
'''

'''
To create a dataframe, you can pass a dictionary of lists to the Dataframe
constructor:
1) The key of the dictionary will be the column name
2) The associating list will be the values within that column.
'''
# 创建 Dataframe.
if True:
    data = {
        'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
        'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions','Lions', 'Lions'],
        'wins': [11, 8, 10, 15, 11, 6, 10, 4],
        'losses': [5, 8, 6, 1, 5, 10, 6, 12]
    }
    football = pd.DataFrame(data)
    print football

   losses     team  wins  year
0       5    Bears    11  2010
1       8    Bears     8  2011
2       6    Bears    10  2012
3       1  Packers    15  2011
4       5  Packers    11  2012
5      10    Lions     6  2010
6       6    Lions    10  2011
7      12    Lions     4  2012


In [13]:
'''
Pandas also has various functions that will help you understand some basic
information about your data frame. Some of these functions are:

1) dtypes: to get the datatype for each column
获取每一列的数据类型.

2) describe: useful for seeing basic statistics of the dataframe's numerical columns
获取本 Dataframe 的基础数据.

3) head: displays the first five rows of the dataset
获取 Dataframe 的前五行.

4) tail: displays the last five rows of the dataset
获取 Dataframe 的最后五行.
'''

# Dataframe 的方法.
if True:
    data = {
        'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
        'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
        'wins': [11, 8, 10, 15, 11, 6, 10, 4],
        'losses': [5, 8, 6, 1, 5, 10, 6, 12]
    }
    football = pd.DataFrame(data)
    
    print football.dtypes
    print ""
    
    print football.describe()
    print ""
    
    print football.head()
    print ""
    
    print football.tail()

losses     int64
team      object
wins       int64
year       int64
dtype: object

          losses       wins         year
count   8.000000   8.000000     8.000000
mean    6.625000   9.375000  2011.125000
std     3.377975   3.377975     0.834523
min     1.000000   4.000000  2010.000000
25%     5.000000   7.500000  2010.750000
50%     6.000000  10.000000  2011.000000
75%     8.500000  11.000000  2012.000000
max    12.000000  15.000000  2012.000000

   losses     team  wins  year
0       5    Bears    11  2010
1       8    Bears     8  2011
2       6    Bears    10  2012
3       1  Packers    15  2011
4       5  Packers    11  2012

   losses     team  wins  year
3       1  Packers    15  2011
4       5  Packers    11  2012
5      10    Lions     6  2010
6       6    Lions    10  2011
7      12    Lions     4  2012


In [84]:
# 从 Dataframe 中取值.

data = {
    'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
    'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions','Lions', 'Lions'],
    'wins': [11, 8, 10, 15, 11, 6, 10, 4],
    'losses': [5, 8, 6, 1, 5, 10, 6, 12]
}

football = pd.DataFrame(data)

print football
print '\n\n'

# 查询 index 为 0 的项.
print 'football.loc[0]\n---'
print football.loc[0]
print '\n\n'

# 打印方式不同.
print 'football.iloc[[0]]\n---'
print football.loc[[0]]
print '\n\n'

# iloc 可以支持 position 定位方式取值.
# loc 仅支持 index 取值.
print 'football.iloc[0] & football.iloc[0, 0]\n---'
print football.iloc[0]
print ''
print football.iloc[0, 0]  # 定位方式取值.
print '\n\n'

# 打印方式不同.
print 'football.iloc[[0]]\n---'
print football.iloc[[0]]
print '\n\n'

# 查找 year 列.
print football['year']
print '\n\n'

# 查找 year 和 wins 列.
print football[['year', 'wins']]
print '\n\n'

# 查找 wins > 10 的项.
print football[football['wins'] > 10]
print '\n\n'

# 查找 wins > 10 的项且只看 year 列.
print football[football['wins'] > 10]['year']
print '\n\n'

   losses     team  wins  year
0       5    Bears    11  2010
1       8    Bears     8  2011
2       6    Bears    10  2012
3       1  Packers    15  2011
4       5  Packers    11  2012
5      10    Lions     6  2010
6       6    Lions    10  2011
7      12    Lions     4  2012



football.loc[0]
---
losses        5
team      Bears
wins         11
year       2010
Name: 0, dtype: object



football.iloc[[0]]
---
   losses   team  wins  year
0       5  Bears    11  2010



football.iloc[0] & football.iloc[0, 0]
---
losses        5
team      Bears
wins         11
year       2010
Name: 0, dtype: object

5



football.iloc[[0]]
---
   losses   team  wins  year
0       5  Bears    11  2010



0    2010
1    2011
2    2012
3    2011
4    2012
5    2010
6    2011
7    2012
Name: year, dtype: int64



   year  wins
0  2010    11
1  2011     8
2  2012    10
3  2011    15
4  2012    11
5  2010     6
6  2011    10
7  2012     4



   losses     team  wins  year
0       5    Bears    11  2010
3    