In [186]:
import pandas as pd
import numpy as np
from pandas import Series
import copy

In [127]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [128]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.702829,0.751522,-0.287243,-1.905485
2013-01-02,-1.096133,-1.256141,-0.325016,-0.733267
2013-01-03,-0.133441,-1.863581,-0.053794,-0.373477
2013-01-04,0.521455,-0.435632,0.352471,-1.052765
2013-01-05,-0.845169,-1.972147,-0.42609,0.679916
2013-01-06,-2.064644,0.192161,-0.316165,-1.482615


In [129]:
# 取Dataframe中尾部数据
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.521455,-0.435632,0.352471,-1.052765
2013-01-05,-0.845169,-1.972147,-0.42609,0.679916
2013-01-06,-2.064644,0.192161,-0.316165,-1.482615


In [130]:
# 取Dataframe中头部数据
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,-0.702829,0.751522,-0.287243,-1.905485
2013-01-02,-1.096133,-1.256141,-0.325016,-0.733267


In [131]:
# 截取某行到某行
df[1:3]

Unnamed: 0,A,B,C,D
2013-01-02,-1.096133,-1.256141,-0.325016,-0.733267
2013-01-03,-0.133441,-1.863581,-0.053794,-0.373477


In [132]:
# 将columns的大写换成是小写
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,a,b,c,d
2013-01-01,-0.702829,0.751522,-0.287243,-1.905485
2013-01-02,-1.096133,-1.256141,-0.325016,-0.733267
2013-01-03,-0.133441,-1.863581,-0.053794,-0.373477
2013-01-04,0.521455,-0.435632,0.352471,-1.052765
2013-01-05,-0.845169,-1.972147,-0.42609,0.679916
2013-01-06,-2.064644,0.192161,-0.316165,-1.482615


In [133]:
# 将某列转化为Series
b = Series(df['a'])

In [134]:
# 将Series转化为numpy数组
np_data = np.array(b)
print(np_data)

[-0.70282934 -1.09613345 -0.13344082  0.52145458 -0.8451689  -2.06464413]


In [135]:
# 遍历每一个元素
for each in np_data:
    print(each)

-0.7028293367120573
-1.0961334481838043
-0.13344081744135514
0.5214545771096588
-0.8451689024798704
-2.0646441288658934


In [136]:
# 获取列索引
df.index.array

<DatetimeArray>
['2013-01-01 00:00:00', '2013-01-02 00:00:00', '2013-01-03 00:00:00',
 '2013-01-04 00:00:00', '2013-01-05 00:00:00', '2013-01-06 00:00:00']
Length: 6, dtype: datetime64[ns]

In [137]:
# 获取行索引
df.columns.array

<PandasArray>
['a', 'b', 'c', 'd']
Length: 4, dtype: object

In [138]:
# 修改索引
df.columns = [1,2,3,4]
df.index = [1,2,3,4,5,6]
df

Unnamed: 0,1,2,3,4
1,-0.702829,0.751522,-0.287243,-1.905485
2,-1.096133,-1.256141,-0.325016,-0.733267
3,-0.133441,-1.863581,-0.053794,-0.373477
4,0.521455,-0.435632,0.352471,-1.052765
5,-0.845169,-1.972147,-0.42609,0.679916
6,-2.064644,0.192161,-0.316165,-1.482615


In [139]:
# 获取行
df[:1]

Unnamed: 0,1,2,3,4
1,-0.702829,0.751522,-0.287243,-1.905485


In [140]:
# 获取列
df[1]

1   -0.702829
2   -1.096133
3   -0.133441
4    0.521455
5   -0.845169
6   -2.064644
Name: 1, dtype: float64

In [141]:
# 实现numpy的转化
arr = df[1].to_numpy()
arr

array([-0.70282934, -1.09613345, -0.13344082,  0.52145458, -0.8451689 ,
       -2.06464413])

In [142]:
# 对列进行排序
df[1].sort_values()

6   -2.064644
2   -1.096133
5   -0.845169
1   -0.702829
3   -0.133441
4    0.521455
Name: 1, dtype: float64

In [143]:
# 返回某行到某行
df.loc[0:2]

Unnamed: 0,1,2,3,4
1,-0.702829,0.751522,-0.287243,-1.905485
2,-1.096133,-1.256141,-0.325016,-0.733267


In [144]:
# 只选取某一行
df.loc[2]

1   -1.096133
2   -1.256141
3   -0.325016
4   -0.733267
Name: 2, dtype: float64

In [145]:
# 查看某一列值的数据类型
df[1].dtype

dtype('float64')

In [146]:
# 强制转换某一列的数据类型
df = df.astype({1:'int64'}) 
df[1].dtype

dtype('int64')

In [147]:
# 获取矩阵转置
df.T

Unnamed: 0,1,2,3,4,5,6
1,0.0,-1.0,0.0,0.0,0.0,-2.0
2,0.751522,-1.256141,-1.863581,-0.435632,-1.972147,0.192161
3,-0.287243,-0.325016,-0.053794,0.352471,-0.42609,-0.316165
4,-1.905485,-0.733267,-0.373477,-1.052765,0.679916,-1.482615


In [148]:
# 矩阵相乘自己的转置
multi_matrix = np.dot(df,df.T)
df_matrix = pd.DataFrame(multi_matrix)
df_matrix

Unnamed: 0,0,1,2,3,4,5
0,4.278169,0.546571,-0.673415,1.577397,-2.655289,3.060331
1,0.546571,3.221206,2.632263,1.204615,2.117222,2.94853
2,-0.673415,2.632263,3.615313,1.186058,3.444245,0.212622
3,1.577397,1.204615,1.186058,1.422326,-0.006846,1.365695
4,-2.655289,2.117222,3.444245,-0.006846,4.533204,-1.252308
5,3.060331,2.94853,0.212622,1.365695,-1.252308,6.335033


In [149]:
# 拷贝一样的列
df[5] = df[4]
df

Unnamed: 0,1,2,3,4,5
1,0,0.751522,-0.287243,-1.905485,-1.905485
2,-1,-1.256141,-0.325016,-0.733267,-0.733267
3,0,-1.863581,-0.053794,-0.373477,-0.373477
4,0,-0.435632,0.352471,-1.052765,-1.052765
5,0,-1.972147,-0.42609,0.679916,0.679916
6,-2,0.192161,-0.316165,-1.482615,-1.482615


In [160]:
# 拷贝一样的行
list2 = df[3:4]
df_list2 = pd.DataFrame(list2)
df_list2.index = [7]
df_list2
df = df.append(df_list2, ignore_index=True)
df

Unnamed: 0,1,2,3,4,5
0,0,0.751522,-0.287243,-1.905485,-1.905485
1,-1,-1.256141,-0.325016,-0.733267,-0.733267
2,0,-1.863581,-0.053794,-0.373477,-0.373477
3,0,-0.435632,0.352471,-1.052765,-1.052765
4,0,-1.972147,-0.42609,0.679916,0.679916
5,-2,0.192161,-0.316165,-1.482615,-1.482615
6,0,-0.435632,0.352471,-1.052765,-1.052765
7,0,-0.435632,0.352471,-1.052765,-1.052765
8,0,-0.435632,0.352471,-1.052765,-1.052765
9,0,-0.435632,0.352471,-1.052765,-1.052765


In [167]:
# 删除末行
df = df[:-1]
df

Unnamed: 0,1,2,3,4,5
0,0,0.751522,-0.287243,-1.905485,-1.905485
1,-1,-1.256141,-0.325016,-0.733267,-0.733267
2,0,-1.863581,-0.053794,-0.373477,-0.373477
3,0,-0.435632,0.352471,-1.052765,-1.052765
4,0,-1.972147,-0.42609,0.679916,0.679916
5,-2,0.192161,-0.316165,-1.482615,-1.482615
6,0,-0.435632,0.352471,-1.052765,-1.052765


In [199]:
# 更换单个值
# df.loc[0,1] = 11
df

Unnamed: 0,1,2,3,4,5,score
0,11,5.0,-0.287243,-1.905485,-1.905485,80
1,-1,-1.256141,-0.325016,-0.733267,-0.733267,98
2,0,-1.863581,-0.053794,-0.373477,-0.373477,67
3,0,-0.435632,0.352471,-1.052765,-1.052765,90
4,0,-1.972147,-0.42609,0.679916,0.679916,100
5,-2,0.192161,-0.316165,-1.482615,-1.482615,25
6,0,-0.435632,0.352471,-1.052765,-1.052765,81


In [201]:
# 遍历每一行
for row in df.values:
    print(row)

[11.          5.         -0.28724323 -1.90548548 -1.90548548 80.        ]
[-1.         -1.25614097 -0.32501584 -0.73326713 -0.73326713 98.        ]
[ 0.00000000e+00 -1.86358111e+00 -5.37937487e-02 -3.73476972e-01
 -3.73476972e-01  6.70000000e+01]
[ 0.         -0.43563186  0.3524711  -1.0527653  -1.0527653  90.        ]
[  0.          -1.97214743  -0.42609042   0.67991553   0.67991553
 100.        ]
[-2.          0.19216108 -0.31616486 -1.48261492 -1.48261492 25.        ]
[ 0.         -0.43563186  0.3524711  -1.0527653  -1.0527653  81.        ]


In [217]:
# 遍历每一列
df_columns = df.columns
for i in df_columns:
    print(df[i])

0    11
1    -1
2     0
3     0
4     0
5    -2
6     0
Name: 1, dtype: int64
0    5.000000
1   -1.256141
2   -1.863581
3   -0.435632
4   -1.972147
5    0.192161
6   -0.435632
Name: 2, dtype: float64
0   -0.287243
1   -0.325016
2   -0.053794
3    0.352471
4   -0.426090
5   -0.316165
6    0.352471
Name: 3, dtype: float64
0   -1.905485
1   -0.733267
2   -0.373477
3   -1.052765
4    0.679916
5   -1.482615
6   -1.052765
Name: 4, dtype: float64
0   -1.905485
1   -0.733267
2   -0.373477
3   -1.052765
4    0.679916
5   -1.482615
6   -1.052765
Name: 5, dtype: float64
0     80
1     98
2     67
3     90
4    100
5     25
6     81
Name: score, dtype: int64


In [220]:
# 改变整体的dataframe的数据类型
df2 = pd.DataFrame(df,dtype = 'int')
df2

Unnamed: 0,1,2,3,4,5,score
0,11,5,0,-1,-1,80
1,-1,-1,0,0,0,98
2,0,-1,0,0,0,67
3,0,0,0,-1,-1,90
4,0,-1,0,0,0,100
5,-2,0,0,-1,-1,25
6,0,0,0,-1,-1,81
