In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1,2,3,np.nan,5,6])
s

0    1.0
1    2.0
2    3.0
3    NaN
4    5.0
5    6.0
dtype: float64

In [3]:
s = pd.Series(['D','5.0','str'],index = [4,5,7])
s

4      D
5    5.0
7    str
dtype: object

In [4]:
s = pd.Series(np.random.randn(5),index = ['a','b','c','d','e'])
s

a    0.129558
b   -0.862111
c    1.837074
d    0.776796
e    0.867427
dtype: float64

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
d = {'a':1,'b':2,'c':3}
s = pd.Series(d)
s

a    1
b    2
c    3
dtype: int64

In [7]:
d = {'a':1,'b':2,'c':3}
pd.Series(d,index=['a','b','d','c'])

a    1.0
b    2.0
d    NaN
c    3.0
dtype: float64

In [8]:
pd.Series(6,index=[1,2,3,4,5])


1    6
2    6
3    6
4    6
5    6
dtype: int64

In [9]:
s = pd.Series(np.random.randn(5),index = ['a','b','c','d','e'])

In [10]:
s

a   -0.683760
b    0.514295
c   -0.852886
d   -0.933489
e    0.631434
dtype: float64

In [11]:
s[0]

-0.6837599038452168

In [12]:
s[3:]

d   -0.933489
e    0.631434
dtype: float64

In [13]:
s[s > s.median()]

b    0.514295
e    0.631434
dtype: float64

In [14]:
np.exp(s)

a    0.504716
b    1.672460
c    0.426183
d    0.393180
e    1.880305
dtype: float64

In [15]:
s.dtype

dtype('float64')

In [16]:
s.array

<PandasArray>
[-0.6837599038452168,  0.5142954856015259, -0.8528862196756851,
 -0.9334885081134715,  0.6314337448353512]
Length: 5, dtype: float64

In [17]:
s.to_numpy()

array([-0.6837599 ,  0.51429549, -0.85288622, -0.93348851,  0.63143374])

In [18]:
s['a']

-0.6837599038452168

In [19]:
'e' in s

True

In [20]:
'f' in s

False

In [21]:
s.get('f')


In [22]:
s.get('f',np.nan)

nan

In [23]:
s = pd.Series(np.random.randn(4),index = ['a','b','c','d'])
s

a    1.939763
b   -1.411094
c    1.247095
d    0.558225
dtype: float64

In [24]:
s+s

a    3.879525
b   -2.822188
c    2.494189
d    1.116450
dtype: float64

In [25]:
s * 2 

a    3.879525
b   -2.822188
c    2.494189
d    1.116450
dtype: float64

In [26]:
s[:2] + s[1:]

a         NaN
b   -2.822188
c         NaN
d         NaN
dtype: float64

In [27]:
s = pd.Series(np.random.randn(3),name='一个Series对象')
s.name

'一个Series对象'

In [28]:
s1 = s.rename('一个新的名字')
s1.name

'一个新的名字'

In [29]:
d = {
    'one':pd.Series([1,2,3],index=['a','b','c']),
    'two':pd.Series([4,5,6,7],index=['a','b','c','d'])
}

In [30]:
df = pd.DataFrame(d)

In [31]:
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,5
c,3.0,6
d,,7


In [32]:
df = pd.DataFrame(d,index = ['a','b','c','d'],columns=['two','three'])
df

Unnamed: 0,two,three
a,4,
b,5,
c,6,
d,7,


In [33]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [34]:
df.columns

Index(['two', 'three'], dtype='object')

In [35]:
d = {'first':[1,2,3],'second':[4,5,6]}
df = pd.DataFrame(d)
df

Unnamed: 0,first,second
0,1,4
1,2,5
2,3,6


In [36]:
data = [{'a':1,'b':2},{'c':3,'d':4}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c,d
0,1.0,2.0,,
1,,,3.0,4.0


In [37]:
s1 = pd.Series(np.random.randn(4))
s2 = pd.Series(np.random.randn(4))
pd.DataFrame({'one':s1,'two':s2})

Unnamed: 0,one,two
0,1.083107,0.153142
1,0.493324,0.838513
2,-0.589221,-0.589488
3,0.300849,-0.923145


In [38]:
df = pd.DataFrame([{'a':1,'b':2},{'a':3,'b':4},{'a':5,'b':6}],index=['one','two','three'])
df

Unnamed: 0,a,b
one,1,2
two,3,4
three,5,6


In [39]:
df['a']

one      1
two      3
three    5
Name: a, dtype: int64

In [40]:
df['c'] = df['a'] * df['b']
df

Unnamed: 0,a,b,c
one,1,2,2
two,3,4,12
three,5,6,30


In [41]:
df['flag'] = df['c']>10
df

Unnamed: 0,a,b,c,flag
one,1,2,2,False
two,3,4,12,True
three,5,6,30,True


In [42]:
del df['flag']
df.pop('c')
df

Unnamed: 0,a,b
one,1,2
two,3,4
three,5,6


In [43]:
df['new'] = 'Hello'
df

Unnamed: 0,a,b,new
one,1,2,Hello
two,3,4,Hello
three,5,6,Hello


In [44]:
df['new'] = df['b'][:1]
df

Unnamed: 0,a,b,new
one,1,2,2.0
two,3,4,
three,5,6,


In [45]:
df.insert(1,'insert',df['b'])
df

Unnamed: 0,a,insert,b,new
one,1,2,2,2.0
two,3,4,4,
three,5,6,6,


In [46]:
df.loc['one']


a         1.0
insert    2.0
b         2.0
new       2.0
Name: one, dtype: float64

In [47]:
df.iloc[2]

a         5.0
insert    6.0
b         6.0
new       NaN
Name: three, dtype: float64

In [48]:
df.T

Unnamed: 0,one,two,three
a,1.0,3.0,5.0
insert,2.0,4.0,6.0
b,2.0,4.0,6.0
new,2.0,,


In [49]:
df.a

one      1
two      3
three    5
Name: a, dtype: int64

In [50]:
date = pd.date_range('2022/1/1',periods=8)
date

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08'],
              dtype='datetime64[ns]', freq='D')

In [51]:
s = pd.Series(np.random.randn(5),index = ['a','b','c','d','e'])


In [52]:
df = pd.DataFrame(np.random.randn(8,3),index=date,columns=['A','B','C'])

In [53]:
long_series = pd.Series(np.random.randn(1000))
long_series.head()

0   -0.100896
1   -0.361613
2   -1.679536
3   -0.040938
4    0.182217
dtype: float64

In [54]:
long_series.tail(3)

997   -1.101965
998   -0.721716
999    0.113706
dtype: float64

In [55]:
df.shape

(8, 3)

In [56]:
df.columns = [x.lower() for x in df.columns] # 把df中列的标签改为小写
df.head()

Unnamed: 0,a,b,c
2022-01-01,-0.129687,0.823438,0.617742
2022-01-02,-0.664699,-0.162348,1.049455
2022-01-03,1.000771,-0.906787,0.119878
2022-01-04,0.527656,-0.461703,1.145813
2022-01-05,0.044587,-0.515621,0.986171


In [57]:
s.index.array

<PandasArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

In [58]:
s.array

<PandasArray>
[ 0.17410873903124857, -0.38568922491618923,   2.3294114141955866,
 -0.03546977212595958,  -1.7411408024119992]
Length: 5, dtype: float64

In [59]:
s.to_numpy()

array([ 0.17410874, -0.38568922,  2.32941141, -0.03546977, -1.7411408 ])

In [60]:
np.asarray(s)

array([ 0.17410874, -0.38568922,  2.32941141, -0.03546977, -1.7411408 ])

In [61]:
df = pd.DataFrame(
        {
            'one':pd.Series(np.random.randn(3),index=['a','b','c']),
            'two':pd.Series(np.random.randn(4),index=['a','b','c','d']),
            'three':pd.Series(np.random.randn(2),index=['b','c'])
        }
    )
df

Unnamed: 0,one,two,three
a,-0.226778,0.205719,
b,-1.080915,-0.573301,-0.74357
c,-1.937514,0.931099,-0.170354
d,,0.882277,


In [62]:
row = df.iloc[1]
row

one     -1.080915
two     -0.573301
three   -0.743570
Name: b, dtype: float64

In [63]:
df.add(row,axis=1)

Unnamed: 0,one,two,three
a,-1.307694,-0.367582,
b,-2.161831,-1.146602,-1.487141
c,-3.018429,0.357798,-0.913925
d,,0.308975,


In [64]:
columns = df['two']
columns

a    0.205719
b   -0.573301
c    0.931099
d    0.882277
Name: two, dtype: float64

In [65]:
df.sub(columns,axis=0)

Unnamed: 0,one,two,three
a,-0.432497,0.0,
b,-0.507614,0.0,-0.170269
c,-2.868613,0.0,-1.101453
d,,0.0,


In [66]:
df

Unnamed: 0,one,two,three
a,-0.226778,0.205719,
b,-1.080915,-0.573301,-0.74357
c,-1.937514,0.931099,-0.170354
d,,0.882277,


In [67]:
df2 = pd.DataFrame({'one':[1,2,3,4],'two':[2,5,4,8],'three':[5,6,1,2]},index=['a','b','c','d'])
df2

Unnamed: 0,one,two,three
a,1,2,5
b,2,5,6
c,3,4,1
d,4,8,2


In [68]:
df.add(df2, fill_value=0)

Unnamed: 0,one,two,three
a,0.773222,2.205719,5.0
b,0.919085,4.426699,5.25643
c,1.062486,4.931099,0.829646
d,4.0,8.882277,2.0


In [69]:
df

Unnamed: 0,one,two,three
a,-0.226778,0.205719,
b,-1.080915,-0.573301,-0.74357
c,-1.937514,0.931099,-0.170354
d,,0.882277,


In [70]:
(df>0).all()

one      False
two      False
three    False
dtype: bool

In [71]:
df.empty

False

In [72]:
(df>0).all().all()

False

In [73]:
pd.DataFrame(columns=['ABC']).empty

True

In [74]:
(df>0).any()

one      False
two       True
three    False
dtype: bool

In [75]:
(df +df).equals(2 * df)

True

In [76]:
np.array([1, 2, 3]) == np.array([2])

array([False,  True, False])

In [77]:
df

Unnamed: 0,one,two,three
a,-0.226778,0.205719,
b,-1.080915,-0.573301,-0.74357
c,-1.937514,0.931099,-0.170354
d,,0.882277,


In [78]:
df.mean()

one     -1.081736
two      0.361448
three   -0.456962
dtype: float64

In [79]:
df.median(axis=1)

a   -0.010530
b   -0.743570
c   -0.170354
d    0.882277
dtype: float64

In [80]:
df.std(axis=0,skipna=True)

one      0.855368
two      0.705637
three    0.405325
dtype: float64

In [81]:
s = pd.Series(np.random.randn(1000))
s.describe()

count    1000.000000
mean        0.021978
std         0.987783
min        -3.207217
25%        -0.675628
50%         0.018961
75%         0.734849
max         3.335776
dtype: float64

In [82]:
df = pd.DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
df

Unnamed: 0,A,B,C
0,-0.567549,-0.338204,-0.144193
1,-0.992694,-0.701576,-0.366568
2,-0.107124,-0.056439,0.678802
3,-0.629553,1.076433,-1.252925
4,-0.7533,-2.340144,-0.469283


In [83]:
df.idxmax(axis=1)

0    C
1    C
2    C
3    B
4    C
dtype: object

In [84]:
df.idxmin(axis=0)

A    1
B    4
C    3
dtype: int64

In [85]:
d = np.random.randint(0,10,size=(50))
d

array([6, 7, 2, 4, 9, 2, 0, 2, 2, 6, 9, 2, 9, 0, 3, 9, 5, 7, 6, 1, 7, 8,
       6, 9, 6, 2, 8, 9, 5, 0, 3, 0, 0, 0, 8, 7, 9, 4, 8, 6, 5, 7, 0, 0,
       1, 3, 5, 6, 7, 0])

In [86]:
s = pd.Series(d)
s.value_counts()

0    9
6    7
9    7
7    6
2    6
5    4
8    4
3    3
4    2
1    2
dtype: int64

In [87]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
s

a    1.584789
b    0.996338
c    0.484087
d    0.496449
e   -1.393620
dtype: float64

In [89]:
s.reindex(['e','b','g','a','i'])

e   -1.393620
b    0.996338
g         NaN
a    1.584789
i         NaN
dtype: float64

In [90]:
s1.reindex_like(s)


a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
dtype: float64

In [91]:
date = pd.date_range('2022/1/1',periods=8)
s = pd.Series(np.random.randn(8),index=date)
s2 = s[[0,3,6]]

In [97]:
s

2022-01-01   -0.579916
2022-01-02    2.261039
2022-01-03    2.422621
2022-01-04   -0.350369
2022-01-05   -0.400870
2022-01-06   -1.017559
2022-01-07   -1.343754
2022-01-08    0.411179
Freq: D, dtype: float64

In [98]:
s2

2022-01-01   -0.579916
2022-01-04   -0.350369
2022-01-07   -1.343754
Freq: 3D, dtype: float64

In [99]:
s2.reindex(s.index)

2022-01-01   -0.579916
2022-01-02         NaN
2022-01-03         NaN
2022-01-04   -0.350369
2022-01-05         NaN
2022-01-06         NaN
2022-01-07   -1.343754
2022-01-08         NaN
Freq: D, dtype: float64

In [100]:
s2.reindex(s.index,method = 'ffill')

2022-01-01   -0.579916
2022-01-02   -0.579916
2022-01-03   -0.579916
2022-01-04   -0.350369
2022-01-05   -0.350369
2022-01-06   -0.350369
2022-01-07   -1.343754
2022-01-08   -1.343754
Freq: D, dtype: float64

In [101]:
s2.reindex(s.index,method = 'bfill')

2022-01-01   -0.579916
2022-01-02   -0.350369
2022-01-03   -0.350369
2022-01-04   -0.350369
2022-01-05   -1.343754
2022-01-06   -1.343754
2022-01-07   -1.343754
2022-01-08         NaN
Freq: D, dtype: float64

In [102]:
s2.reindex(s.index,method = 'nearest')

2022-01-01   -0.579916
2022-01-02   -0.579916
2022-01-03   -0.350369
2022-01-04   -0.350369
2022-01-05   -0.350369
2022-01-06   -1.343754
2022-01-07   -1.343754
2022-01-08   -1.343754
Freq: D, dtype: float64

In [103]:
s2.reindex(s.index,method = 'ffill',limit = 1)

2022-01-01   -0.579916
2022-01-02   -0.579916
2022-01-03         NaN
2022-01-04   -0.350369
2022-01-05   -0.350369
2022-01-06         NaN
2022-01-07   -1.343754
2022-01-08   -1.343754
Freq: D, dtype: float64

In [104]:
df

Unnamed: 0,A,B,C
0,-0.567549,-0.338204,-0.144193
1,-0.992694,-0.701576,-0.366568
2,-0.107124,-0.056439,0.678802
3,-0.629553,1.076433,-1.252925
4,-0.7533,-2.340144,-0.469283


In [106]:
df.drop('A',axis=1)

Unnamed: 0,B,C
0,-0.338204,-0.144193
1,-0.701576,-0.366568
2,-0.056439,0.678802
3,1.076433,-1.252925
4,-2.340144,-0.469283


In [108]:
df.drop([1,3],axis=0)

Unnamed: 0,A,B,C
0,-0.567549,-0.338204,-0.144193
2,-0.107124,-0.056439,0.678802
4,-0.7533,-2.340144,-0.469283


In [111]:
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
s

a   -0.326658
b    1.032068
c    1.408185
d   -0.498249
e   -0.780058
dtype: float64

In [113]:
s.rename(str.upper)

A   -0.326658
B    1.032068
C    1.408185
D   -0.498249
E   -0.780058
dtype: float64

In [116]:
s.rename({'a':'o','b':'p','c':'q','d':'r','e':'s'})

o   -0.326658
p    1.032068
q    1.408185
r   -0.498249
s   -0.780058
dtype: float64

In [117]:
df = pd.DataFrame({"col1": np.random.randn(3), "col2": np.random.randn(3)}, index=["a", "b","c"])

for col in df:
    print(col)

col1
col2


In [118]:
for row in df:
    print(row)

col1
col2


In [119]:
for label, ser in df.items():
    print(label)
    print(ser)

col1
a   -0.752748
b    0.355623
c   -1.396863
Name: col1, dtype: float64
col2
a    0.170046
b    2.115181
c    1.130240
Name: col2, dtype: float64


In [120]:
for index, row in df.iterrows():
    print(index,row,sep='\n')

a
col1   -0.752748
col2    0.170046
Name: a, dtype: float64
b
col1    0.355623
col2    2.115181
Name: b, dtype: float64
c
col1   -1.396863
col2    1.130240
Name: c, dtype: float64


In [121]:
for row in df.itertuples():
    print(row)

Pandas(Index='a', col1=-0.7527479960512652, col2=0.17004562449070096)
Pandas(Index='b', col1=0.3556232583123519, col2=2.1151810120272803)
Pandas(Index='c', col1=-1.3968625542650128, col2=1.1302402391429855)


In [122]:
df = pd.DataFrame({
"one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
"two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
"three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
})


In [123]:
unsorted_df = df.reindex(
index=["a", "d", "c", "b"], columns=["three", "two", "one"]
)

In [124]:
unsorted_df

Unnamed: 0,three,two,one
a,,0.663213,0.33488
d,0.002582,0.722183,
c,-0.063257,-2.603502,0.459381
b,-0.926569,0.244567,-0.16542


In [125]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,0.663213,0.33488
b,-0.926569,0.244567,-0.16542
c,-0.063257,-2.603502,0.459381
d,0.002582,0.722183,


In [126]:
unsorted_df.sort_index(ascending=False)

Unnamed: 0,three,two,one
d,0.002582,0.722183,
c,-0.063257,-2.603502,0.459381
b,-0.926569,0.244567,-0.16542
a,,0.663213,0.33488


In [127]:
unsorted_df.sort_index(axis=1)

Unnamed: 0,one,three,two
a,0.33488,,0.663213
d,,0.002582,0.722183
c,0.459381,-0.063257,-2.603502
b,-0.16542,-0.926569,0.244567


In [130]:
df1 = pd.DataFrame({"one": [2, 1, 1, 1], "two": [1, 3, 2, 4], "three": [5, np.nan, 3, 2]})

In [131]:
df1.sort_values(by='two')

Unnamed: 0,one,two,three
0,2,1,5.0
2,1,2,3.0
1,1,3,
3,1,4,2.0


In [132]:
df1.sort_values('three',na_position='first')

Unnamed: 0,one,two,three
1,1,3,
3,1,4,2.0
2,1,2,3.0
0,2,1,5.0


In [134]:
df1.sort_values(by=['three','two'])

Unnamed: 0,one,two,three
3,1,4,2.0
2,1,2,3.0
0,2,1,5.0
1,1,3,
