In [69]:
import numpy as np
import pandas as pd
dates = pd.date_range("20230101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [70]:
df.mean()

A    0.252611
B    0.687240
C    0.117209
D   -0.222441
dtype: float64

In [71]:
df.mean(axis=1)

2023-01-01    1.278740
2023-01-02    0.303985
2023-01-03   -0.328766
2023-01-04    0.071403
2023-01-05    0.150550
2023-01-06   -0.223983
Freq: D, dtype: float64

In [72]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
s

2023-01-01    NaN
2023-01-02    NaN
2023-01-03    1.0
2023-01-04    3.0
2023-01-05    5.0
2023-01-06    NaN
Freq: D, dtype: float64

In [73]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D
2023-01-01,,,,
2023-01-02,,,,
2023-01-03,-0.070261,-3.033275,-0.776479,-1.43505
2023-01-04,-3.267929,-2.063689,-2.831171,-3.5516
2023-01-05,-6.266439,-2.567667,-5.669501,-4.894194
2023-01-06,,,,


User defined functions

In [74]:
df.agg(lambda x: np.mean(x) * 5.6)

A    1.414622
B    3.848542
C    0.656368
D   -1.245671
dtype: float64

In [75]:
df.transform(lambda x: x + 101.2)

Unnamed: 0,A,B,C,D
2023-01-01,103.187824,103.232767,103.029817,100.464551
2023-01-02,100.717003,102.287215,101.752846,101.258875
2023-01-03,102.129739,99.166725,101.423521,100.76495
2023-01-04,100.932071,102.136311,101.368829,100.6484
2023-01-05,99.933561,103.632333,100.530499,101.305806
2023-01-06,101.815469,100.868087,99.79774,101.42277


Value Counts

In [76]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    5
1    5
2    4
3    2
4    3
5    4
6    1
7    0
8    0
9    0
dtype: int64

In [77]:
s.value_counts()

0    3
5    2
4    2
2    1
3    1
1    1
Name: count, dtype: int64

String Methods

In [78]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

Merge

In [79]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,0.78001,0.022093,-0.038235,0.615825
1,-1.158147,0.255082,0.82265,-0.884656
2,0.160678,0.868828,0.912406,1.05283
3,0.031918,-1.754132,2.021289,2.126747
4,-0.557363,-0.18966,-1.575806,-0.843577
5,-0.301226,1.374698,-1.678042,0.868055
6,-0.711295,0.557393,0.720694,0.154846
7,0.080941,-0.668766,-0.520756,-0.5526
8,-0.684362,-0.854491,-0.397223,0.366134
9,0.544889,1.206529,-0.190811,1.160006


In [80]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0  0.780010  0.022093 -0.038235  0.615825
 1 -1.158147  0.255082  0.822650 -0.884656
 2  0.160678  0.868828  0.912406  1.052830,
           0         1         2         3
 3  0.031918 -1.754132  2.021289  2.126747
 4 -0.557363 -0.189660 -1.575806 -0.843577
 5 -0.301226  1.374698 -1.678042  0.868055
 6 -0.711295  0.557393  0.720694  0.154846,
           0         1         2         3
 7  0.080941 -0.668766 -0.520756 -0.552600
 8 -0.684362 -0.854491 -0.397223  0.366134
 9  0.544889  1.206529 -0.190811  1.160006]

In [81]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.78001,0.022093,-0.038235,0.615825
1,-1.158147,0.255082,0.82265,-0.884656
2,0.160678,0.868828,0.912406,1.05283
3,0.031918,-1.754132,2.021289,2.126747
4,-0.557363,-0.18966,-1.575806,-0.843577
5,-0.301226,1.374698,-1.678042,0.868055
6,-0.711295,0.557393,0.720694,0.154846
7,0.080941,-0.668766,-0.520756,-0.5526
8,-0.684362,-0.854491,-0.397223,0.366134
9,0.544889,1.206529,-0.190811,1.160006


Join

In [82]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})

In [83]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [84]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [85]:
pd.merge(left , right, on="key")

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [86]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]})

pd.merge(left , right, on="key")

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5
