# ６. pandas入门

In [3]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [14]:
print(Series().__class__)
print(Series.__doc__)
print(Series.__bases__)
print(Series.__name__)
print(DataFrame.__module__)
print(Series.__dict__)

<class 'pandas.core.series.Series'>

    One-dimensional ndarray with axis labels (including time series).

    Labels need not be unique but must be a hashable type. The object
    supports both integer- and label-based indexing and provides a host of
    methods for performing operations involving the index. Statistical
    methods from ndarray have been overridden to automatically exclude
    missing data (currently represented as NaN).

    Operations between Series (+, -, /, *, **) align values based on their
    associated index values-- they need not be the same length. The result
    index will be the sorted union of the two indexes.

    Parameters
    ----------
    data : array-like, dict, or scalar value
        Contains data stored in Series
    index : array-like or Index (1d)
        Values must be hashable and have the same length as `data`.
        Non-unique index values are allowed. Will default to
        RangeIndex(len(data)) if not provided. If both a dict and ind

In [13]:
type(Series())

pandas.core.series.Series

## 6.1 Series

- ### list to Series

In [20]:
obj = pd.Series([4,-7,8,9])
obj

0    4
1   -7
2    8
3    9
dtype: int64

In [21]:
obj.values

array([ 4, -7,  8,  9])

In [22]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [25]:
obj2=Series([4,5,0,-4],index=['d','a','b','fg'])
obj2

d     4
a     5
b     0
fg   -4
dtype: int64

In [26]:
obj2['a']

5

In [27]:
obj2[obj2>0]

d    4
a    5
dtype: int64

In [29]:
'a' in obj2

True

In [30]:
'c' in obj2

False

- ### dict to Series

In [32]:
sd = {"Ohio":3500,"Texas":7100,"Oregon":1600,"Utah":5100}
obj3=Series(sd)
obj3

Ohio      3500
Oregon    1600
Texas     7100
Utah      5100
dtype: int64

In [33]:
states={"California","Ohio","New York"}
obj4=Series(sd,index=states)
obj4

Ohio          3500.0
New York         NaN
California       NaN
dtype: float64

In [36]:
pd.isnull(obj4)

Ohio          False
New York       True
California     True
dtype: bool

In [38]:
pd.notnull(obj4)

Ohio           True
New York      False
California    False
dtype: bool

In [39]:
obj3+obj4

California       NaN
New York         NaN
Ohio          7000.0
Oregon           NaN
Texas            NaN
Utah             NaN
dtype: float64

In [40]:
(obj3+obj4).index

Index(['California', 'New York', 'Ohio', 'Oregon', 'Texas', 'Utah'], dtype='object')

## 6.2 DataFrame

- ### dict to DataFrame

In [101]:
data={'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],'year':[2000,2001,2002,2001,2002],'pop':[1.5,1.7,3.6,2.4,2.9]}
frame=DataFrame(data)
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


In [102]:
frame.index

RangeIndex(start=0, stop=5, step=1)

In [103]:
frame2=DataFrame(data,columns=['state','pop','year'])
frame2

Unnamed: 0,state,pop,year
0,Ohio,1.5,2000
1,Ohio,1.7,2001
2,Ohio,3.6,2002
3,Nevada,2.4,2001
4,Nevada,2.9,2002


In [104]:
frame3=DataFrame(data,index=['one','two','three','four','five'])
frame3

Unnamed: 0,pop,state,year
one,1.5,Ohio,2000
two,1.7,Ohio,2001
three,3.6,Ohio,2002
four,2.4,Nevada,2001
five,2.9,Nevada,2002


In [119]:
frame3.columns

Index(['pop', 'state', 'year', 'debt'], dtype='object')

In [120]:
frame3.values

array([[1.5, 'Ohio', 2000, 0],
       [1.7, 'Ohio', 2001, 1],
       [3.6, 'Ohio', 2002, 2],
       [2.4, 'Nevada', 2001, 3],
       [2.9, 'Nevada', 2002, 4]], dtype=object)

In [106]:
frame3['state']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [107]:
frame3.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [108]:
frame3['debt']=12
frame3

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,12
two,1.7,Ohio,2001,12
three,3.6,Ohio,2002,12
four,2.4,Nevada,2001,12
five,2.9,Nevada,2002,12


In [109]:
frame3['debt']=range(5)
frame3

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,0
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [117]:
frame3.ix[3]

pop         2.4
state    Nevada
year       2001
debt          3
Name: four, dtype: object

In [111]:
frame3.ix['four']

pop         2.4
state    Nevada
year       2001
debt          3
Name: four, dtype: object

In [112]:
frame3['eastern']=frame3.state=='Ohio'
frame3

Unnamed: 0,pop,state,year,debt,eastern
one,1.5,Ohio,2000,0,True
two,1.7,Ohio,2001,1,True
three,3.6,Ohio,2002,2,True
four,2.4,Nevada,2001,3,False
five,2.9,Nevada,2002,4,False


In [114]:
frame3.columns
del frame3['eastern']

In [115]:
frame3

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,0
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [116]:
frame3.T

Unnamed: 0,one,two,three,four,five
pop,1.5,1.7,3.6,2.4,2.9
state,Ohio,Ohio,Ohio,Nevada,Nevada
year,2000,2001,2002,2001,2002
debt,0,1,2,3,4


- ### index对象

In [161]:
ind = frame3.index
ind

Index(['one', 'two', 'three', 'four', 'five'], dtype='object')

In [162]:
ind[:3]

Index(['one', 'two', 'three'], dtype='object')

In [163]:
#index是immutable对象,　不可以修改已经有的值
ind[3]='三'

TypeError: Index does not support mutable operations

In [164]:
frame3.reindex(['two','three','four','one','five','six','seven'])

Unnamed: 0,pop,state,year,debt
two,1.7,Ohio,2001.0,1.0
three,3.6,Ohio,2002.0,2.0
four,2.4,Nevada,2001.0,3.0
one,1.5,Ohio,2000.0,0.0
five,2.9,Nevada,2002.0,4.0
six,,,,
seven,,,,


In [165]:
frame3.reindex(['two','three','four','one','five','six','seven'],fill_value=100)

Unnamed: 0,pop,state,year,debt
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
one,1.5,Ohio,2000,0
five,2.9,Nevada,2002,4
six,100.0,100,100,100
seven,100.0,100,100,100


In [166]:
frame３

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,0
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [167]:
# frame3.drop(['six'])

In [168]:
frame3

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,0
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [169]:
frame3[frame3['pop']>2]

Unnamed: 0,pop,state,year,debt
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [170]:
frame3[1:4]

Unnamed: 0,pop,state,year,debt
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3


In [171]:
frame3['state']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [172]:
frame3<10

Unnamed: 0,pop,state,year,debt
one,True,True,False,True
two,True,True,False,True
three,True,True,False,True
four,True,True,False,True
five,True,True,False,True


In [174]:
frame3

Unnamed: 0,pop,state,year,debt
one,1.5,Ohio,2000,0
two,1.7,Ohio,2001,1
three,3.6,Ohio,2002,2
four,2.4,Nevada,2001,3
five,2.9,Nevada,2002,4


In [177]:
frame3.ix[['Ohio'],[1,2,3]]

Unnamed: 0,state,year,debt
Ohio,,,
