# chapter 1 introducing pandas objects
有三种基本的数据结构：Series,DataFrame,Index

## Series

In [2]:
import pandas as pd
import numpy as np

In [5]:
data = pd.Series([0.25,0.5,0.75,2.0])
#we can access with the values and index attributes
#numpy array
data.values
#pd.Index
data.index
#data[1]
#data[1:3]

array([0.25, 0.5 , 0.75, 2.  ])

In [8]:
#比numpy array更灵活，它的index可以是string
data = pd.Series([0.25,0.5,0.75,1.0],index=['a','b','c','d'])
#索引可以是不连续的
data = pd.Series([0.25,0.5,0.75,1.0],index=[2，3，5，7])

In [12]:
#Series as specialized dictionary
#也可以把它当成字典，不过更灵活有效
population_dict = {'California': 38332521,
                               'Texas': 26448193,
                               'New York': 19651127,
                               'Florida': 19552860,
                               'Illinois': 12882135}
population = pd.Series(population_dict)
#Unlike a dictionary, though, the Series also supports array-style operations such as slicing
population['California':'Illinois']
#根据字典的key排序
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

## DataFrame

In [16]:
#DataFrame as a generalized NumPy array
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
                 'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
states = pd.DataFrame({'population':population,'area':area})
#DataFrame has an index(common with Series) and columns(values in Series) attributes
states.index,states.columns
#DataFrame as specialized dictionary

(Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object'),
 Index(['area', 'population'], dtype='object'))

In [21]:
#construct dataframe
#1. From a single Series object
pd.DataFrame(population,columns=['population'])

#2. From a list of dicts
data = [{'a':i,'b':i*2} for i in range(3)]#字典推导
pd.DataFrame(data)
#不需要键值一样
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

#3. From a dictionary of Series objects 
#just like before

#4. From a two-dimensional NumPy array
pd.DataFrame(np.random.rand(3, 2),
                         columns=['foo', 'bar'],
                         index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.431035,0.685901
b,0.99105,0.613099
c,0.058607,0.223692


## Index

In [23]:
ind = pd.Index([2,3,5,6,7])
ind

Int64Index([2, 3, 5, 6, 7], dtype='int64')

In [26]:
#Index as immutable array
ind[::2]
#ind[1]=0 #will be wrong,cause it's immutable
#Index as ordered set，pandas会涉及很多数据的交集操作，这会依赖很多集合(set)算法，Index也支持set操作
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([1, 2, 5, 7, 9])
indA & indB,indA | indB,indA ^ indB

(Int64Index([1, 5, 7, 9], dtype='int64'),
 Int64Index([1, 2, 3, 5, 7, 9], dtype='int64'),
 Int64Index([2, 3], dtype='int64'))

# Data Indexing and Selection

In [None]:
#indexing(arr[2,1]),slicing(arr[:,1:5]),masking(arr[arr>0]),fancy indexing(arr[0,[1,5]])，conbinitions thereof(arr[：，[1,5])


In [29]:
#关于Series 记住它表现的像numpy array 和 dictionary,对这些的操作能运用到Series
#dictionary like
data = pd.Series([0.25, 0.5, 0.75, 1.0],index=['a', 'b', 'c', 'd'])
data['b'],'a' in data,data.keys(),list(data.items())
data['e'] = 1.25

#array like
#slicing by implicit index

(0.5,
 True,
 Index(['a', 'b', 'c', 'd'], dtype='object'),
 [('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)])