## Pandas

In [30]:
import pandas as pd
import numpy as np

In [31]:
# object creation
s = pd.Series([1,3,np.nan,4,5,68])
s

0     1.0
1     3.0
2     NaN
3     4.0
4     5.0
5    68.0
dtype: float64

In [32]:
dates = pd.date_range("20220608", periods=11)
dates

DatetimeIndex(['2022-06-08', '2022-06-09', '2022-06-10', '2022-06-11',
               '2022-06-12', '2022-06-13', '2022-06-14', '2022-06-15',
               '2022-06-16', '2022-06-17', '2022-06-18'],
              dtype='datetime64[ns]', freq='D')

In [33]:
df = pd.DataFrame(np.random.randn(11,4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-06-08,-1.699183,0.838797,1.162041,0.237765
2022-06-09,-1.239197,1.005451,0.701044,0.671601
2022-06-10,-1.292122,0.497907,0.536964,1.439719
2022-06-11,-0.725872,-1.003095,-0.770441,0.032851
2022-06-12,-0.767621,-0.178055,-0.436837,-0.053709
2022-06-13,0.562427,-1.863264,0.179113,0.718221
2022-06-14,0.08973,0.794085,-1.624011,-0.428469
2022-06-15,-0.313202,-0.41007,-0.03953,0.756623
2022-06-16,-0.090382,-0.445668,0.132789,0.402681
2022-06-17,-0.160508,0.472233,-0.233078,1.031168


In [34]:
df2 = pd.DataFrame(
    {
        "A": 2.0,
        "B": pd.Timestamp("20220806"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["girl","woman","test","train"]),
        "F": "females"
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,2.0,2022-08-06,1.0,3,girl,females
1,2.0,2022-08-06,1.0,3,woman,females
2,2.0,2022-08-06,1.0,3,test,females
3,2.0,2022-08-06,1.0,3,train,females


In [35]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [36]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,2.0,2022-08-06,1.0,3,girl,females
1,2.0,2022-08-06,1.0,3,woman,females
2,2.0,2022-08-06,1.0,3,test,females
3,2.0,2022-08-06,1.0,3,train,females


In [37]:
df2.tail(3)

Unnamed: 0,A,B,C,D,E,F
1,2.0,2022-08-06,1.0,3,woman,females
2,2.0,2022-08-06,1.0,3,test,females
3,2.0,2022-08-06,1.0,3,train,females


In [38]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [39]:
df.to_numpy

<bound method DataFrame.to_numpy of                    A         B         C         D
2022-06-08 -1.699183  0.838797  1.162041  0.237765
2022-06-09 -1.239197  1.005451  0.701044  0.671601
2022-06-10 -1.292122  0.497907  0.536964  1.439719
2022-06-11 -0.725872 -1.003095 -0.770441  0.032851
2022-06-12 -0.767621 -0.178055 -0.436837 -0.053709
2022-06-13  0.562427 -1.863264  0.179113  0.718221
2022-06-14  0.089730  0.794085 -1.624011 -0.428469
2022-06-15 -0.313202 -0.410070 -0.039530  0.756623
2022-06-16 -0.090382 -0.445668  0.132789  0.402681
2022-06-17 -0.160508  0.472233 -0.233078  1.031168
2022-06-18 -0.204682  0.310331 -0.396163 -0.201806>

In [40]:
df.describe()

Unnamed: 0,A,B,C,D
count,11.0,11.0,11.0,11.0
mean,-0.530965,0.001696,-0.071646,0.418786
std,0.678652,0.882536,0.758817,0.565086
min,-1.699183,-1.863264,-1.624011,-0.428469
25%,-1.003409,-0.427869,-0.4165,-0.010429
50%,-0.313202,0.310331,-0.03953,0.402681
75%,-0.125445,0.645996,0.358038,0.737422
max,0.562427,1.005451,1.162041,1.439719


In [41]:
df2.T

Unnamed: 0,0,1,2,3
A,2.0,2.0,2.0,2.0
B,2022-08-06 00:00:00,2022-08-06 00:00:00,2022-08-06 00:00:00,2022-08-06 00:00:00
C,1.0,1.0,1.0,1.0
D,3,3,3,3
E,girl,woman,test,train
F,females,females,females,females


In [42]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2022-06-08,-1.699183,0.838797,1.162041,0.237765
2022-06-09,-1.239197,1.005451,0.701044,0.671601
2022-06-10,-1.292122,0.497907,0.536964,1.439719
2022-06-11,-0.725872,-1.003095,-0.770441,0.032851
2022-06-12,-0.767621,-0.178055,-0.436837,-0.053709
2022-06-13,0.562427,-1.863264,0.179113,0.718221
2022-06-14,0.08973,0.794085,-1.624011,-0.428469
2022-06-15,-0.313202,-0.41007,-0.03953,0.756623
2022-06-16,-0.090382,-0.445668,0.132789,0.402681
2022-06-17,-0.160508,0.472233,-0.233078,1.031168


In [46]:
df.loc[dates[3]]

A   -0.725872
B   -1.003095
C   -0.770441
D    0.032851
Name: 2022-06-11 00:00:00, dtype: float64

In [49]:
df.iloc[3:10]

Unnamed: 0,A,B,C,D
2022-06-11,-0.725872,-1.003095,-0.770441,0.032851
2022-06-12,-0.767621,-0.178055,-0.436837,-0.053709
2022-06-13,0.562427,-1.863264,0.179113,0.718221
2022-06-14,0.08973,0.794085,-1.624011,-0.428469
2022-06-15,-0.313202,-0.41007,-0.03953,0.756623
2022-06-16,-0.090382,-0.445668,0.132789,0.402681
2022-06-17,-0.160508,0.472233,-0.233078,1.031168


In [52]:
df[df["A"] > 0.003]

Unnamed: 0,A,B,C,D
2022-06-13,0.562427,-1.863264,0.179113,0.718221
2022-06-14,0.08973,0.794085,-1.624011,-0.428469
