## 数据组织和呈现的方式

In [2]:
from pandas import DataFrame, Series

In [3]:
df_ = DataFrame({
    'age': [21, 22, 23],
    'name': ['jack', 'tom', 'mary']
})
print(df_)
print(df_.items)

   age  name
0   21  jack
1   22   tom
2   23  mary
<bound method DataFrame.iteritems of    age  name
0   21  jack
1   22   tom
2   23  mary>


In [5]:
df = DataFrame({
    'age': [21, 22, 23, 24, 25],
    'name': ['jack', 'tom', 'mary', 'tim', 'kal']
}, index = ['first', 'second', 'third', 'four', 'five'])
print(df)

        age  name
first    21  jack
second   22   tom
third    23  mary
four     24   tim
five     25   kal


## 访问

In [6]:
# 按列访问
df['age']

first     21
second    22
third     23
four      24
five      25
Name: age, dtype: int64

In [8]:
# 按行访问
df[1:3]

Unnamed: 0,age,name
second,22,tom
third,23,mary


In [12]:
# 按行列号访问
df.iloc[0:2, 0:1]

Unnamed: 0,age
first,21
second,22


In [14]:
# 按行索引，列名访问
df.at['first', 'name']

'jack'

In [17]:
# 修改列名
print(df.columns, '原列名')
df.columns = ['age_', 'name_']
print(df.columns, '修改后')

Index(['age', 'name'], dtype='object') 原列名
Index(['age_', 'name_'], dtype='object') 修改后


In [21]:
# 修改行索引
print(df.index, '原行索引, 参考上面，修改后直接修改原始数据')
df.index = range(1, 6)
print(df.index, '修改后')
print(df)

RangeIndex(start=1, stop=6, step=1) 原行索引
RangeIndex(start=1, stop=6, step=1) 修改后
   age_ name_
1    21  jack
2    22   tom
3    23  mary
4    24   tim
5    25   kal


### 根据行列删除 -> 不会操作原始数据， del -> 删除会操作原始数据

In [23]:
# 根据行索引删除
# axis = 0 -> 默认，行
df.drop(1, axis=0)
# print(df)

Unnamed: 0,age_,name_
2,22,tom
3,23,mary
4,24,tim
5,25,kal


In [24]:
# 不会操作原始数据
print(df)

   age_ name_
1    21  jack
2    22   tom
3    23  mary
4    24   tim
5    25   kal


In [26]:
# 按列名删除
# axis = 1 -> 列
df.drop('age_', axis=1)

Unnamed: 0,name_
1,jack
2,tom
3,mary
4,tim
5,kal


In [27]:
print(df)

   age_ name_
1    21  jack
2    22   tom
3    23  mary
4    24   tim
5    25   kal


In [31]:
# 第二种删除方法
del df['age_']
# 会操作原始数据
print(df)

  name_
1  jack
2   tom
3  mary
4   tim
5   kal


### 增加

In [37]:
df = DataFrame({
    'age': [21, 22, 23, 24, 25],
    'name': ['jack', 'tom', 'mary', 'tim', 'kal']
}, index = ['first', 'second', 'third', 'four', 'five'])
# print(df)
# 增加行
df.loc[len(df)] = [26, 'lop']
print(df)

        age  name
first    21  jack
second   22   tom
third    23  mary
four     24   tim
five     25   kal
5        26   lop


In [38]:
# 增加列
df['gender'] = [1, 0, 0, 0, 1, 0]
print(df)

        age  name  gender
first    21  jack       1
second   22   tom       0
third    23  mary       0
four     24   tim       0
five     25   kal       1
5        26   lop       0


In [42]:
Series([2, 3, 4])

0    2
1    3
2    4
dtype: int64

## 遍历

In [43]:
df = DataFrame({
    'age': Series([21, 22, 23]),
    'name': Series(['a', 'b', 'c'])
})
df

Unnamed: 0,age,name
0,21,a
1,22,b
2,23,c


In [48]:
# 遍历列
for col in df:
    print('列：', col)
    print('值：\n', df[col])
    print('------------')

列： age
值：
 0    21
1    22
2    23
Name: age, dtype: int64
------------
列： name
值：
 0    a
1    b
2    c
Name: name, dtype: object
------------


In [58]:
# 遍历行，1
# df.iloc
for row in df.index:
    print('现在是第: ', row, '行')
    print(df.iloc()[row])

现在是第:  0 行
age     21
name     a
Name: 0, dtype: object
现在是第:  1 行
age     22
name     b
Name: 1, dtype: object
现在是第:  2 行
age     23
name     c
Name: 2, dtype: object


In [63]:
# 遍历行，2
for r in df.values:
    print(r)
    print(r[0])

[21 'a']
21
[22 'b']
22
[23 'c']
23


In [66]:
# 遍历行，3
for index, row in df.iterrows():
    print(index)
    print(row)

0
age     21
name     a
Name: 0, dtype: object
1
age     22
name     b
Name: 1, dtype: object
2
age     23
name     c
Name: 2, dtype: object


In [67]:
import numpy as np

In [68]:
np.random.randn(5)

array([-1.2457956 ,  1.3244505 ,  0.91201535,  0.60403867,  1.03929585])

## 判断

In [69]:
df = DataFrame({
    'data_': np.random.randn(5),
    'data_01': np.random.randn(5),
})
df

Unnamed: 0,data_,data_01
0,-0.363531,0.491511
1,-0.999652,1.16587
2,-2.151753,1.191461
3,-1.652179,-0.167977
4,-1.122669,0.056055
