In [1]:
import pandas as pd
import numpy as np
from pandas import Series

In [2]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.105899,1.358544,0.503598,0.411258
2013-01-02,-0.462173,0.489251,1.296628,-1.155972
2013-01-03,-0.3545,-0.035041,-0.779967,0.235855
2013-01-04,0.434285,0.480654,-1.126446,0.864055
2013-01-05,-0.187896,2.590489,-0.340271,0.661748
2013-01-06,-0.180472,-0.855244,-0.94411,0.429397


In [4]:
# 取Dataframe中尾部数据
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.434285,0.480654,-1.126446,0.864055
2013-01-05,-0.187896,2.590489,-0.340271,0.661748
2013-01-06,-0.180472,-0.855244,-0.94411,0.429397


In [5]:
# 取Dataframe中头部数据
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,-1.105899,1.358544,0.503598,0.411258
2013-01-02,-0.462173,0.489251,1.296628,-1.155972


In [6]:
# 截取某行到某行
df[1:3]

Unnamed: 0,A,B,C,D
2013-01-02,-0.462173,0.489251,1.296628,-1.155972
2013-01-03,-0.3545,-0.035041,-0.779967,0.235855


In [7]:
# 将columns的大写换成是小写
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,a,b,c,d
2013-01-01,-1.105899,1.358544,0.503598,0.411258
2013-01-02,-0.462173,0.489251,1.296628,-1.155972
2013-01-03,-0.3545,-0.035041,-0.779967,0.235855
2013-01-04,0.434285,0.480654,-1.126446,0.864055
2013-01-05,-0.187896,2.590489,-0.340271,0.661748
2013-01-06,-0.180472,-0.855244,-0.94411,0.429397


In [8]:
# 将某列转化为Series
b = Series(df['a'])

In [9]:
# 将Series转化为numpy数组
np_data = np.array(b)
print(np_data)

[-1.10589851 -0.46217261 -0.35450048  0.43428483 -0.18789609 -0.18047187]


In [10]:
# 遍历每一个元素
for each in np_data:
    print(each)

-1.1058985112387523
-0.46217260855210274
-0.35450048143106905
0.4342848327121899
-0.1878960902559143
-0.18047187116799976


In [11]:
# 获取列索引
df.index.array

<DatetimeArray>
['2013-01-01 00:00:00', '2013-01-02 00:00:00', '2013-01-03 00:00:00',
 '2013-01-04 00:00:00', '2013-01-05 00:00:00', '2013-01-06 00:00:00']
Length: 6, dtype: datetime64[ns]

In [12]:
# 获取行索引
df.columns.array

<PandasArray>
['a', 'b', 'c', 'd']
Length: 4, dtype: object

In [13]:
# 修改索引
df.columns = [1,2,3,4]
df.index = [1,2,3,4,5,6]
df

Unnamed: 0,1,2,3,4
1,-1.105899,1.358544,0.503598,0.411258
2,-0.462173,0.489251,1.296628,-1.155972
3,-0.3545,-0.035041,-0.779967,0.235855
4,0.434285,0.480654,-1.126446,0.864055
5,-0.187896,2.590489,-0.340271,0.661748
6,-0.180472,-0.855244,-0.94411,0.429397


In [14]:
# 获取行
df[:1]

Unnamed: 0,1,2,3,4
1,-1.105899,1.358544,0.503598,0.411258


In [15]:
# 获取列
df[1]

1   -1.105899
2   -0.462173
3   -0.354500
4    0.434285
5   -0.187896
6   -0.180472
Name: 1, dtype: float64

In [16]:
# 实现numpy的转化
arr = df[1].to_numpy()
arr

array([-1.10589851, -0.46217261, -0.35450048,  0.43428483, -0.18789609,
       -0.18047187])

In [26]:
# 对列进行排序
df[1].sort_values()

1   -1.105899
2   -0.462173
3   -0.354500
5   -0.187896
6   -0.180472
4    0.434285
Name: 1, dtype: float64