In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range("20240919", periods=10)

In [5]:
dates

DatetimeIndex(['2024-09-19', '2024-09-20', '2024-09-21', '2024-09-22',
               '2024-09-23', '2024-09-24', '2024-09-25', '2024-09-26',
               '2024-09-27', '2024-09-28'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df = pd.DataFrame(np.random.randn(10, 4), index=dates, columns=list("ABCD"))

In [10]:
df

Unnamed: 0,A,B,C,D
2024-09-19,-2.568066,0.788232,-0.915825,-2.818292
2024-09-20,-0.699118,0.635301,0.785961,0.103549
2024-09-21,0.938266,-0.537379,-0.135296,-0.375633
2024-09-22,1.487447,-0.418853,-1.443539,-1.032403
2024-09-23,-0.662187,-0.265087,-1.469091,0.254237
2024-09-24,-1.121695,1.958646,-0.326305,-1.517174
2024-09-25,0.418751,-0.06361,-1.860928,1.033652
2024-09-26,0.946772,0.120705,1.773967,0.103898
2024-09-27,-0.921028,1.464708,0.000993,-0.010987
2024-09-28,-1.568773,0.796082,-0.961733,0.481026


In [11]:
df2 = pd.DataFrame(
    {
        "A": 1.5,
        "B": pd.Timestamp("20240919"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([4] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [12]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.5,2024-09-19,1.0,4,test,foo
1,1.5,2024-09-19,1.0,4,train,foo
2,1.5,2024-09-19,1.0,4,test,foo
3,1.5,2024-09-19,1.0,4,train,foo


In [13]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [14]:
df.head()

Unnamed: 0,A,B,C,D
2024-09-19,-2.568066,0.788232,-0.915825,-2.818292
2024-09-20,-0.699118,0.635301,0.785961,0.103549
2024-09-21,0.938266,-0.537379,-0.135296,-0.375633
2024-09-22,1.487447,-0.418853,-1.443539,-1.032403
2024-09-23,-0.662187,-0.265087,-1.469091,0.254237


In [15]:
df.tail()

Unnamed: 0,A,B,C,D
2024-09-24,-1.121695,1.958646,-0.326305,-1.517174
2024-09-25,0.418751,-0.06361,-1.860928,1.033652
2024-09-26,0.946772,0.120705,1.773967,0.103898
2024-09-27,-0.921028,1.464708,0.000993,-0.010987
2024-09-28,-1.568773,0.796082,-0.961733,0.481026


In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,10.0,10.0,10.0,10.0
mean,-0.374963,0.447875,-0.45518,-0.377813
std,1.284452,0.829157,1.119838,1.125967
min,-2.568066,-0.537379,-1.860928,-2.818292
25%,-1.071528,-0.214717,-1.323088,-0.868211
50%,-0.680653,0.378003,-0.621065,0.046281
75%,0.808388,0.79412,-0.033079,0.216652
max,1.487447,1.958646,1.773967,1.033652


In [17]:
df.info

<bound method DataFrame.info of                    A         B         C         D
2024-09-19 -2.568066  0.788232 -0.915825 -2.818292
2024-09-20 -0.699118  0.635301  0.785961  0.103549
2024-09-21  0.938266 -0.537379 -0.135296 -0.375633
2024-09-22  1.487447 -0.418853 -1.443539 -1.032403
2024-09-23 -0.662187 -0.265087 -1.469091  0.254237
2024-09-24 -1.121695  1.958646 -0.326305 -1.517174
2024-09-25  0.418751 -0.063610 -1.860928  1.033652
2024-09-26  0.946772  0.120705  1.773967  0.103898
2024-09-27 -0.921028  1.464708  0.000993 -0.010987
2024-09-28 -1.568773  0.796082 -0.961733  0.481026>

In [18]:
df.T

Unnamed: 0,2024-09-19,2024-09-20,2024-09-21,2024-09-22,2024-09-23,2024-09-24,2024-09-25,2024-09-26,2024-09-27,2024-09-28
A,-2.568066,-0.699118,0.938266,1.487447,-0.662187,-1.121695,0.418751,0.946772,-0.921028,-1.568773
B,0.788232,0.635301,-0.537379,-0.418853,-0.265087,1.958646,-0.06361,0.120705,1.464708,0.796082
C,-0.915825,0.785961,-0.135296,-1.443539,-1.469091,-0.326305,-1.860928,1.773967,0.000993,-0.961733
D,-2.818292,0.103549,-0.375633,-1.032403,0.254237,-1.517174,1.033652,0.103898,-0.010987,0.481026


In [19]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2024-09-19,-2.568066,0.788232,-0.915825,-2.818292
2024-09-20,-0.699118,0.635301,0.785961,0.103549
2024-09-21,0.938266,-0.537379,-0.135296,-0.375633
2024-09-22,1.487447,-0.418853,-1.443539,-1.032403
2024-09-23,-0.662187,-0.265087,-1.469091,0.254237
2024-09-24,-1.121695,1.958646,-0.326305,-1.517174
2024-09-25,0.418751,-0.06361,-1.860928,1.033652
2024-09-26,0.946772,0.120705,1.773967,0.103898
2024-09-27,-0.921028,1.464708,0.000993,-0.010987
2024-09-28,-1.568773,0.796082,-0.961733,0.481026


In [21]:
df.sort_values(by="C")

Unnamed: 0,A,B,C,D
2024-09-25,0.418751,-0.06361,-1.860928,1.033652
2024-09-23,-0.662187,-0.265087,-1.469091,0.254237
2024-09-22,1.487447,-0.418853,-1.443539,-1.032403
2024-09-28,-1.568773,0.796082,-0.961733,0.481026
2024-09-19,-2.568066,0.788232,-0.915825,-2.818292
2024-09-24,-1.121695,1.958646,-0.326305,-1.517174
2024-09-21,0.938266,-0.537379,-0.135296,-0.375633
2024-09-27,-0.921028,1.464708,0.000993,-0.010987
2024-09-20,-0.699118,0.635301,0.785961,0.103549
2024-09-26,0.946772,0.120705,1.773967,0.103898


In [22]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2024-09-22,1.487447,-0.418853
2024-09-23,-0.662187,-0.265087


In [23]:
df[df > 0]

Unnamed: 0,A,B,C,D
2024-09-19,,0.788232,,
2024-09-20,,0.635301,0.785961,0.103549
2024-09-21,0.938266,,,
2024-09-22,1.487447,,,
2024-09-23,,,,0.254237
2024-09-24,,1.958646,,
2024-09-25,0.418751,,,1.033652
2024-09-26,0.946772,0.120705,1.773967,0.103898
2024-09-27,,1.464708,0.000993,
2024-09-28,,0.796082,,0.481026


In [24]:
df.mean()

A   -0.374963
B    0.447875
C   -0.455180
D   -0.377813
dtype: float64

In [25]:
df.median()

A   -0.680653
B    0.378003
C   -0.621065
D    0.046281
dtype: float64

In [26]:
df.mode()

Unnamed: 0,A,B,C,D
0,-2.568066,-0.537379,-1.860928,-2.818292
1,-1.568773,-0.418853,-1.469091,-1.517174
2,-1.121695,-0.265087,-1.443539,-1.032403
3,-0.921028,-0.06361,-0.961733,-0.375633
4,-0.699118,0.120705,-0.915825,-0.010987
5,-0.662187,0.635301,-0.326305,0.103549
6,0.418751,0.788232,-0.135296,0.103898
7,0.938266,0.796082,0.000993,0.254237
8,0.946772,1.464708,0.785961,0.481026
9,1.487447,1.958646,1.773967,1.033652


In [27]:
df.transform(lambda x: x * 101.2)

Unnamed: 0,A,B,C,D
2024-09-19,-259.888282,79.769059,-92.681523,-285.211139
2024-09-20,-70.750752,64.292416,79.539285,10.479143
2024-09-21,94.952555,-54.382708,-13.691932,-38.014089
2024-09-22,150.529668,-42.387957,-146.086154,-104.479174
2024-09-23,-67.013335,-26.826756,-148.672036,25.728774
2024-09-24,-113.51552,198.214949,-33.022059,-153.538054
2024-09-25,42.377615,-6.437308,-188.325915,104.605589
2024-09-26,95.81328,12.215317,179.525442,10.514521
2024-09-27,-93.207989,148.22849,0.100487,-1.111905
2024-09-28,-158.759862,80.563509,-97.327365,48.679799


In [28]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])

s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [29]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})

right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})

left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [31]:
df.groupby(["A", "B"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
-2.568066,0.788232,-0.915825,-2.818292
-1.568773,0.796082,-0.961733,0.481026
-1.121695,1.958646,-0.326305,-1.517174
-0.921028,1.464708,0.000993,-0.010987
-0.699118,0.635301,0.785961,0.103549
-0.662187,-0.265087,-1.469091,0.254237
0.418751,-0.06361,-1.860928,1.033652
0.938266,-0.537379,-0.135296,-0.375633
0.946772,0.120705,1.773967,0.103898
1.487447,-0.418853,-1.443539,-1.032403
