### Pandas Series

In [1]:
import pandas as pd

In [43]:
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['eggs', 'apples', 'milk', 'bread'])

In [44]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [5]:
groceries.shape

(4,)

In [6]:
groceries.size

4

In [7]:
groceries.ndim

1

In [45]:
groceries.index

Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')

In [9]:
groceries.values

array([30, 6, 'Yes', 'No'], dtype=object)

In [10]:
'banana' in groceries

False

In [11]:
'apples' in groceries

True

Access data

In [25]:
groceries.index = ['eggs', 'apples', 'milk', 'bread']

In [26]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [16]:
groceries['eggs']

30

In [19]:
groceries[['eggs', 'apples']]

eggs      30
apples     6
dtype: object

In [20]:
groceries[1]

6

In [22]:
groceries[[1,0]]

apples     6
eggs      30
dtype: object

In [23]:
groceries.loc[['eggs', 'bread']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


eggs      30
bread    NaN
dtype: object

In [28]:
groceries.iloc[[1,3]]

apples     6
bread     No
dtype: object

 Delete

In [29]:
groceries.drop('bread')

eggs       30
apples      6
milk      Yes
dtype: object

In [30]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [31]:
groceries.drop('bread', inplace=True)
groceries

eggs       30
apples      6
milk      Yes
dtype: object

算数运算

In [33]:
fruits = pd.Series(data=[10, 6, 3], index=['apples', 'oranges', 'bananas'])
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [35]:
fruits + 2

apples     12
oranges     8
bananas     5
dtype: int64

In [36]:
fruits -2

apples     8
oranges    4
bananas    1
dtype: int64

In [37]:
fruits * 2

apples     20
oranges    12
bananas     6
dtype: int64

In [38]:
import numpy as np
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [39]:
np.exp(fruits)

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

In [40]:
np.power(fruits, 2)

apples     100
oranges     36
bananas      9
dtype: int64

In [41]:
fruits['bananas'] * 2

6

In [42]:
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [46]:
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['eggs', 'apples', 'milk', 'bread'])

In [47]:
groceries * 2

eggs          60
apples        12
milk      YesYes
bread       NoNo
dtype: object

### Pandas DataFrame

In [50]:
items = {'Bob': pd.Series([245, 35, 45], index=['bike', 'pants', 'watch']),
       'Alice': pd.Series([40, 110, 500, 35], index=['book', 'glasses', 'bike', 'pants'])}

In [51]:
type(items)

dict

In [52]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,35.0,35.0
watch,45.0,


In [53]:
type(shopping_carts)

pandas.core.frame.DataFrame

In [54]:
shopping_carts.values

array([[245., 500.],
       [ nan,  40.],
       [ nan, 110.],
       [ 35.,  35.],
       [ 45.,  nan]])

In [55]:
shopping_carts.shapes

(5, 2)

In [56]:
shopping_carts.ndim

2

In [57]:
shopping_carts.size

10

In [59]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,35
watch,45


In [60]:
sel_shopping_cart = pd.DataFrame(items, index=['pants', 'book'])

In [61]:
sel_shopping_cart

Unnamed: 0,Bob,Alice
pants,35.0,35
book,,40


In [62]:
alice_shopping_cart = pd.DataFrame(items, index=['glasses', 'bike'], columns=['Alice'])
alice_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


In [63]:
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,35.0,35.0
watch,45.0,


In [80]:
items = [{'bikes': 20, 'pants':30, 'watches': 35}, {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5}]
store_items = pd.DataFrame(items, index=['store1', 'store2'])
store_items

Unnamed: 0,bikes,glasses,pants,watches
store1,20,,30,35
store2,15,50.0,5,10


注意：先写列标签，后写行标签

In [81]:
store_items['bikes']['store2']

15

In [82]:
store_items['shirts'] = [15, 2]
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts
store1,20,,30,35,15
store2,15,50.0,5,10,2


In [83]:
store_items['suits'] = store_items['shirts'] + store_items['pants']
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts,suits
store1,20,,30,35,15,45
store2,15,50.0,5,10,2,7


In [84]:
new_items = [{'bikes':20, 'pants': 30, 'watches': 35, 'glasses': 4}]

new_store = pd.DataFrame(new_items, index=['store3'])
new_store

Unnamed: 0,bikes,glasses,pants,watches
store3,20,4,30,35


In [85]:
store_items = store_items.append(new_store)
store_items

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,bikes,glasses,pants,shirts,suits,watches
store1,20,,30,15.0,45.0,35
store2,15,50.0,5,2.0,7.0,10
store3,20,4.0,30,,,35


In [86]:
store_items.insert(5, 'shoes', [8, 5, 0])
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,,,0,35


In [87]:
store_items['new_watch'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches,new_watch
store1,20,,30,15.0,45.0,8,35,
store2,15,50.0,5,2.0,7.0,5,10,10.0
store3,20,4.0,30,,,0,35,35.0


In [88]:
store_items.pop('new_watch')
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,,,0,35


In [76]:
store_items.drop(['watches', 'shoes'], axis=1, inplace=True)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store1,20,,30,15.0,45.0
store2,15,50.0,5,2.0,7.0
store3,20,4.0,30,,


In [77]:
store_items.drop(['store3'], axis=0, inplace=True)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store1,20,,30,15.0,45.0
store2,15,50.0,5,2.0,7.0


In [78]:
store_items.rename(columns={'bikes': 'hats'}, inplace=True)
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store1,20,,30,15.0,45.0
store2,15,50.0,5,2.0,7.0


In [79]:
store_items.rename(index={'store2': 'last_store'}, inplace=True)
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store1,20,,30,15.0,45.0
last_store,15,50.0,5,2.0,7.0


#### 数据清理

In [89]:
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,,,0,35


In [90]:
null_num = store_items.isnull().sum().sum()
print(null_num)

3


In [91]:
null_num = store_items.isnull()
print(null_num)

        bikes  glasses  pants  shirts  suits  shoes  watches
store1  False     True  False   False  False  False    False
store2  False    False  False   False  False  False    False
store3  False    False  False    True   True  False    False


In [92]:
store_items.dropna(axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store2,15,50.0,5,2.0,7.0,5,10


In [93]:
store_items.dropna(axis=1)

Unnamed: 0,bikes,pants,shoes,watches
store1,20,30,8,35
store2,15,5,5,10
store3,20,30,0,35


In [94]:
store_items.fillna(0)

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,0.0,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,0.0,0.0,0,35


In [95]:
store_items.fillna(method='ffill', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,2.0,7.0,0,35


In [96]:
store_items.fillna(method='backfill', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,50.0,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,,,0,35


In [98]:
store_items.interpolate(method='linear', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,suits,shoes,watches
store1,20,,30,15.0,45.0,8,35
store2,15,50.0,5,2.0,7.0,5,10
store3,20,4.0,30,2.0,7.0,0,35


#### 从数据库加载

In [99]:
titanic = pd.read_csv('titanic-data.csv')
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [105]:
titanic.groupby(['Survived','Pclass'])['PassengerId'].count()

Survived  Pclass
0         1          80
          2          97
          3         372
1         1         136
          2          87
          3         119
Name: PassengerId, dtype: int64