In [1]:
import pandas as pd
import numpy as np
# 创建Series
s1 = pd.Series(data=['1', 'a', '3', 'c'], index=['a', 'b', 'c', 'd'])
print(s1['c'])

3


In [2]:
date = pd.date_range('20210101', periods=6)
print(date)

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06'],
              dtype='datetime64[ns]', freq='D')


In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=date, columns=list('abcd'))
print(df)

                   a         b         c         d
2021-01-01 -0.584476  0.737530  1.894501  0.076558
2021-01-02  1.938962  0.127700 -0.102457  0.642288
2021-01-03  0.831839  0.568034 -1.325083  0.047391
2021-01-04  1.784525 -1.289013  1.293223  0.447922
2021-01-05 -0.292791 -0.078850 -0.768216 -0.527572
2021-01-06 -0.362196  0.664273  0.290401 -0.078072


In [4]:
# 数据完整性分析
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2021-01-01 to 2021-01-06
Freq: D
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   a       6 non-null      float64
 1   b       6 non-null      float64
 2   c       6 non-null      float64
 3   d       6 non-null      float64
dtypes: float64(4)
memory usage: 240.0 bytes


In [5]:
df.tail(3)

Unnamed: 0,a,b,c,d
2021-01-04,1.784525,-1.289013,1.293223,0.447922
2021-01-05,-0.292791,-0.07885,-0.768216,-0.527572
2021-01-06,-0.362196,0.664273,0.290401,-0.078072


In [6]:
df[0:2] 

Unnamed: 0,a,b,c,d
2021-01-01,-0.584476,0.73753,1.894501,0.076558
2021-01-02,1.938962,0.1277,-0.102457,0.642288


In [7]:
df2 = df.loc['2021-01-01':'2021-01-04', ['a', 'b']]
df2

Unnamed: 0,a,b
2021-01-01,-0.584476,0.73753
2021-01-02,1.938962,0.1277
2021-01-03,0.831839,0.568034
2021-01-04,1.784525,-1.289013


In [8]:
df3 = df.iloc[0:4, [0, 1]]
df3

Unnamed: 0,a,b
2021-01-01,-0.584476,0.73753
2021-01-02,1.938962,0.1277
2021-01-03,0.831839,0.568034
2021-01-04,1.784525,-1.289013


In [9]:
df4 = df.loc[df.index < '2021-01-04', ['a', 'b']]
df4

Unnamed: 0,a,b
2021-01-01,-0.584476,0.73753
2021-01-02,1.938962,0.1277
2021-01-03,0.831839,0.568034


In [10]:
# 删除a列
df1 = df.drop(labels='a', axis=1)
df1

Unnamed: 0,b,c,d
2021-01-01,0.73753,1.894501,0.076558
2021-01-02,0.1277,-0.102457,0.642288
2021-01-03,0.568034,-1.325083,0.047391
2021-01-04,-1.289013,1.293223,0.447922
2021-01-05,-0.07885,-0.768216,-0.527572
2021-01-06,0.664273,0.290401,-0.078072


In [11]:
# 删除第一行
df2 = df.drop(labels=date[0], axis=0)
df2

Unnamed: 0,a,b,c,d
2021-01-02,1.938962,0.1277,-0.102457,0.642288
2021-01-03,0.831839,0.568034,-1.325083,0.047391
2021-01-04,1.784525,-1.289013,1.293223,0.447922
2021-01-05,-0.292791,-0.07885,-0.768216,-0.527572
2021-01-06,-0.362196,0.664273,0.290401,-0.078072


In [12]:
# 排序
df_sorted_by_index = df.sort_index(axis=0, ascending=False)
df_sorted_by_index

Unnamed: 0,a,b,c,d
2021-01-06,-0.362196,0.664273,0.290401,-0.078072
2021-01-05,-0.292791,-0.07885,-0.768216,-0.527572
2021-01-04,1.784525,-1.289013,1.293223,0.447922
2021-01-03,0.831839,0.568034,-1.325083,0.047391
2021-01-02,1.938962,0.1277,-0.102457,0.642288
2021-01-01,-0.584476,0.73753,1.894501,0.076558


In [13]:
# 按列排序
df_sorted_by_values = df.sort_values(by='a', ascending=True)
df_sorted_by_values

Unnamed: 0,a,b,c,d
2021-01-01,-0.584476,0.73753,1.894501,0.076558
2021-01-06,-0.362196,0.664273,0.290401,-0.078072
2021-01-05,-0.292791,-0.07885,-0.768216,-0.527572
2021-01-03,0.831839,0.568034,-1.325083,0.047391
2021-01-04,1.784525,-1.289013,1.293223,0.447922
2021-01-02,1.938962,0.1277,-0.102457,0.642288


In [18]:
df.to_csv('df.csv', index=True, columns=['a', 'c'])

In [19]:
df_csv = pd.read_csv('df.csv')
df_csv

Unnamed: 0.1,Unnamed: 0,a,c
0,2021-01-01,-0.584476,1.894501
1,2021-01-02,1.938962,-0.102457
2,2021-01-03,0.831839,-1.325083
3,2021-01-04,1.784525,1.293223
4,2021-01-05,-0.292791,-0.768216
5,2021-01-06,-0.362196,0.290401


In [21]:
df.var()

a    1.272419
b    0.580643
c    1.485662
d    0.168797
dtype: float64

In [20]:
df.describe()

Unnamed: 0,a,b,c,d
count,6.0,6.0,6.0,6.0
mean,0.552644,0.121612,0.213728,0.101419
std,1.128015,0.761999,1.218878,0.41085
min,-0.584476,-1.289013,-1.325083,-0.527572
25%,-0.344845,-0.027212,-0.601776,-0.046706
50%,0.269524,0.347867,0.093972,0.061975
75%,1.546354,0.640213,1.042518,0.355081
max,1.938962,0.73753,1.894501,0.642288
