In [1]:
import pandas as pd
import numpy as np

from pandas import Series, DataFrame
from matplotlib import pyplot as plt

%matplotlib inline

### Series

In [2]:
obj = Series([1,2,5,4])

In [3]:
obj.values

array([1, 2, 5, 4], dtype=int64)

In [5]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
obj

0    1
1    2
2    5
3    4
dtype: int64

In [9]:
obj.index=['a','b','c','d']

In [10]:
obj

a    1
b    2
c    5
d    4
dtype: int64

In [11]:
obj['c']

5

In [12]:
obj[0]

1

In [13]:
obj['e'] = -5

In [14]:
obj[4]

-5

In [15]:
obj

a    1
b    2
c    5
d    4
e   -5
dtype: int64

In [16]:
obj[['a','e','b']]

a    1
e   -5
b    2
dtype: int64

In [17]:
obj[obj>0]

a    1
b    2
c    5
d    4
dtype: int64

In [18]:
np.exp(obj)

a      2.718282
b      7.389056
c    148.413159
d     54.598150
e      0.006738
dtype: float64

In [19]:
frame = DataFrame([[1,2,3],[0,5,4],[6,7,1]])

In [20]:
frame

Unnamed: 0,0,1,2
0,1,2,3
1,0,5,4
2,6,7,1


In [21]:
np.exp(frame)

Unnamed: 0,0,1,2
0,2.718282,7.389056,20.085537
1,1.0,148.413159,54.59815
2,403.428793,1096.633158,2.718282


In [22]:
'b' in obj

True

In [23]:
1 in obj

False

In [24]:
obj = Series([0,1])

In [25]:
1 in obj

True

In [27]:
values = [200,100,68,189,-168]

index = ['a','b','c','d','e']

test_s = Series(data=values,index=index)

In [28]:
test_s

a    200
b    100
c     68
d    189
e   -168
dtype: int64

In [31]:
chars ={'A':56,'B':57,'C':58}
chars_name = ['E','A','B','C','D']

char_s = Series(chars,index=chars_name)

In [32]:
char_s

E     NaN
A    56.0
B    57.0
C    58.0
D     NaN
dtype: float64

### 缺失数据的监测，isnull notnull

In [33]:
pd.isnull(char_s)

E     True
A    False
B    False
C    False
D     True
dtype: bool

In [34]:
char_s.isnull()

E     True
A    False
B    False
C    False
D     True
dtype: bool

In [35]:
char_s.notnull()

E    False
A     True
B     True
C     True
D    False
dtype: bool

In [36]:
test_s + char_s

A   NaN
B   NaN
C   NaN
D   NaN
E   NaN
a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
dtype: float64

In [37]:
test_s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [38]:
test_s.index = ['A','B','C','D','E']

In [39]:
test_s + char_s

A    256.0
B    157.0
C    126.0
D      NaN
E      NaN
dtype: float64

In [40]:
char_s + test_s

A    256.0
B    157.0
C    126.0
D      NaN
E      NaN
dtype: float64

In [41]:
char_s

E     NaN
A    56.0
B    57.0
C    58.0
D     NaN
dtype: float64

In [42]:
char_s.name = 'ASCII'

In [43]:
char_s

E     NaN
A    56.0
B    57.0
C    58.0
D     NaN
Name: ASCII, dtype: float64

In [44]:
char_s.index.name = 'chars'

In [45]:
char_s

chars
E     NaN
A    56.0
B    57.0
C    58.0
D     NaN
Name: ASCII, dtype: float64

### DataFrame

In [46]:
# 构建DataFrame的方法一

In [52]:
data = [{'name':'Li','age':27,'salary':2710.2,'gender':'F'},\
       {'name':'Wang','age':35,'salary':5801.7}]

In [53]:
frame = DataFrame(data)

In [54]:
frame

Unnamed: 0,age,gender,name,salary
0,27,F,Li,2710.2
1,35,,Wang,5801.7


In [57]:
data = {"name":['Li','Wang'],\
       "age":[27,35],\
       "salary":[2710.2,5801.7],\
       "gender":['F',None]}

In [58]:
frame = DataFrame(data)

In [59]:
frame

Unnamed: 0,age,gender,name,salary
0,27,F,Li,2710.2
1,35,,Wang,5801.7


In [60]:
frame.columns

Index(['age', 'gender', 'name', 'salary'], dtype='object')

In [61]:
frame.index

RangeIndex(start=0, stop=2, step=1)

In [62]:
frame.name

0      Li
1    Wang
Name: name, dtype: object

In [63]:
frame.salary

0    2710.2
1    5801.7
Name: salary, dtype: float64

In [64]:
frame.index = ['E001','M005']

In [65]:
frame.salary

E001    2710.2
M005    5801.7
Name: salary, dtype: float64

In [68]:
frame

Unnamed: 0,age,gender,name,salary
E001,27,F,Li,2710.2
M005,35,,Wang,5801.7


In [67]:
## 获取行
frame.ix['M005']

age           35
gender      None
name        Wang
salary    5801.7
Name: M005, dtype: object

In [81]:
frame['debt'] = range(2)

In [82]:
frame.columns = pd.Index(['age', 'gender', 'name', 'salary','debt'], dtype='object')

In [83]:
frame

Unnamed: 0,age,gender,name,salary,debt
E001,27,F,Li,2710.2,0
M005,35,,Wang,5801.7,1


In [85]:
frame = DataFrame(frame,columns=['name', 'age', 'gender', 'salary','debt'])

In [86]:
frame

Unnamed: 0,name,age,gender,salary,debt
E001,Li,27,F,2710.2,0
M005,Wang,35,,5801.7,1


In [87]:
frame['hometown'] = Series(['CQ'],index=['E001'])

In [88]:
frame

Unnamed: 0,name,age,gender,salary,debt,hometown
E001,Li,27,F,2710.2,0,CQ
M005,Wang,35,,5801.7,1,


In [89]:
del frame.debt

AttributeError: debt

In [93]:
del frame.debt

AttributeError: debt

In [96]:
# 删除列
del frame['debt']

KeyError: 'debt'

In [97]:
frame

Unnamed: 0,name,age,gender,salary,hometown
E001,Li,27,F,2710.2,CQ
M005,Wang,35,,5801.7,


In [98]:
frame.columns

Index(['name', 'age', 'gender', 'salary', 'hometown'], dtype='object')

In [99]:
frame.index

Index(['E001', 'M005'], dtype='object')

In [100]:
frame.values

array([['Li', 27, 'F', 2710.2, 'CQ'],
       ['Wang', 35, None, 5801.7, nan]], dtype=object)

In [101]:
frame.T

Unnamed: 0,E001,M005
name,Li,Wang
age,27,35
gender,F,
salary,2710.2,5801.7
hometown,CQ,


### 构造DataFrame可以传入的数据
- 二维ndarray
- 列表、字典

In [102]:
data = np.random.randn(5,3)

In [104]:
frame = DataFrame(data,index=range(2010,2015,1),columns=['Computers','Phones','Books'])

In [105]:
frame

Unnamed: 0,Computers,Phones,Books
2010,0.216981,1.519189,-0.344171
2011,1.320207,0.396309,0.254014
2012,-0.978029,0.19986,-0.05966
2013,-1.150723,0.903148,-2.412634
2014,0.70282,-0.233783,2.664985


In [111]:
frame.index.name = 'year'
frame.columns.name = 'product'
frame.name = 'Reports'

In [113]:
frame

product,Computers,Phones,Books
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,0.216981,1.519189,-0.344171
2011,1.320207,0.396309,0.254014
2012,-0.978029,0.19986,-0.05966
2013,-1.150723,0.903148,-2.412634
2014,0.70282,-0.233783,2.664985


In [114]:
frame.values

array([[ 0.21698104,  1.51918928, -0.34417145],
       [ 1.32020681,  0.39630885,  0.25401443],
       [-0.97802852,  0.19985976, -0.05965976],
       [-1.15072324,  0.90314788, -2.41263361],
       [ 0.70281972, -0.23378346,  2.66498476]])

In [115]:
frame.index

RangeIndex(start=2010, stop=2015, step=1, name='year')

In [117]:
pd.RangeIndex.mro()

[pandas.indexes.range.RangeIndex,
 pandas.indexes.numeric.Int64Index,
 pandas.indexes.numeric.NumericIndex,
 pandas.indexes.base.Index,
 pandas.core.base.IndexOpsMixin,
 pandas.core.strings.StringAccessorMixin,
 pandas.core.base.PandasObject,
 pandas.core.base.StringMixin,
 object]

In [118]:
2012 in frame.index

True

In [119]:
2015 in frame.index

False

In [122]:
frame

product,Computers,Phones,Books
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,0.216981,1.519189,-0.344171
2011,1.320207,0.396309,0.254014
2012,-0.978029,0.19986,-0.05966
2013,-1.150723,0.903148,-2.412634
2014,0.70282,-0.233783,2.664985


In [124]:
frame.reindex([2015,2014,2013,2012,2011,2010], fill_value=0)

product,Computers,Phones,Books
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,0.0,0.0,0.0
2014,0.70282,-0.233783,2.664985
2013,-1.150723,0.903148,-2.412634
2012,-0.978029,0.19986,-0.05966
2011,1.320207,0.396309,0.254014
2010,0.216981,1.519189,-0.344171


In [125]:
obj = Series(np.arange(4),index=['a','b','c','d'])

In [126]:
obj

a    0
b    1
c    2
d    3
dtype: int32

In [127]:
obj['a':'c']

a    0
b    1
c    2
dtype: int32

In [128]:
obj[0:2]

a    0
b    1
dtype: int32

In [133]:
def hello():
    """
    print hello to screen!
    """
    print("hello")

In [136]:
hello()

hello


In [138]:
np?

In [151]:
i = 0
def get_a_num():
    global i
    i += 1
    yield i 
    

In [142]:
for i in get_a_num(5):
    print(i)

0
1
2
3
4


In [168]:
list(get_a_num())

[17]

In [169]:
digits_train = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes",header = None)

In [171]:
digits_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1797 entries, 0 to 1796
Data columns (total 65 columns):
0     1797 non-null int64
1     1797 non-null int64
2     1797 non-null int64
3     1797 non-null int64
4     1797 non-null int64
5     1797 non-null int64
6     1797 non-null int64
7     1797 non-null int64
8     1797 non-null int64
9     1797 non-null int64
10    1797 non-null int64
11    1797 non-null int64
12    1797 non-null int64
13    1797 non-null int64
14    1797 non-null int64
15    1797 non-null int64
16    1797 non-null int64
17    1797 non-null int64
18    1797 non-null int64
19    1797 non-null int64
20    1797 non-null int64
21    1797 non-null int64
22    1797 non-null int64
23    1797 non-null int64
24    1797 non-null int64
25    1797 non-null int64
26    1797 non-null int64
27    1797 non-null int64
28    1797 non-null int64
29    1797 non-null int64
30    1797 non-null int64
31    1797 non-null int64
32    1797 non-null int64
33    1797 non-null int64
34    179