In [1]:
import numpy as np
import pandas as pd

# Object Creation
<hr>
<b>Creating a series object which is like list with index</b>

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

<hr>
    <b>Creating a DataFrame by passing a NumPy array, with a datetime index and labeled column</b>

In [4]:
dates = pd.date_range("20220226", periods=4)

In [5]:
dates

DatetimeIndex(['2022-02-26', '2022-02-27', '2022-02-28', '2022-03-01'], dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(4,3),index=dates,columns=list("PQR"))

In [86]:
df

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.0,5,1
2022-02-27,0.026066,-0.042559,5,2
2022-02-28,1.015666,-1.023108,5,3
2022-03-01,-0.005429,-0.574722,5,4


<hr/>
<b>Creating a DataFrame by passing a dictionary of objects that can be converted into a series-like structure</b>

In [19]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20220226"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["Hello", "world", "I'm", "palli"]),
        "F": "cp",
    }
)

In [20]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-02-26,1.0,3,Hello,cp
1,1.0,2022-02-26,1.0,3,world,cp
2,1.0,2022-02-26,1.0,3,I'm,cp
3,1.0,2022-02-26,1.0,3,palli,cp


<hr>
<b>The columns of the resulting DataFrame have different dtypes:</b>

In [22]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

<hr>
<b>Trying random public attribute using TAB</b>

In [24]:
df2.A

0    1.0
1    1.0
2    1.0
3    1.0
Name: A, dtype: float64

In [25]:
df2.abs

<bound method NDFrame.abs of      A          B    C  D      E   F
0  1.0 2022-02-26  1.0  3  Hello  cp
1  1.0 2022-02-26  1.0  3  world  cp
2  1.0 2022-02-26  1.0  3    I'm  cp
3  1.0 2022-02-26  1.0  3  palli  cp>

In [28]:
df2.at_time

<bound method NDFrame.at_time of      A          B    C  D      E   F
0  1.0 2022-02-26  1.0  3  Hello  cp
1  1.0 2022-02-26  1.0  3  world  cp
2  1.0 2022-02-26  1.0  3    I'm  cp
3  1.0 2022-02-26  1.0  3  palli  cp>

# Viewing Data

In [29]:
df.head()

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,-0.042559,0.633702
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899


In [31]:
df.tail(2)

Unnamed: 0,P,Q,R
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899


In [32]:
df.index

DatetimeIndex(['2022-02-26', '2022-02-27', '2022-02-28', '2022-03-01'], dtype='datetime64[ns]', freq='D')

In [34]:
df.columns

Index(['P', 'Q', 'R'], dtype='object')

<hr><b>For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data:</b>

In [36]:
df.to_numpy()

array([[ 1.578501  ,  0.01711906,  1.60940335],
       [ 0.02606583, -0.04255878,  0.63370167],
       [ 1.01566551, -1.02310767,  1.99121758],
       [-0.00542942, -0.57472235, -1.05189879]])

In [37]:
df2.to_numpy()

array([[1.0, Timestamp('2022-02-26 00:00:00'), 1.0, 3, 'Hello', 'cp'],
       [1.0, Timestamp('2022-02-26 00:00:00'), 1.0, 3, 'world', 'cp'],
       [1.0, Timestamp('2022-02-26 00:00:00'), 1.0, 3, "I'm", 'cp'],
       [1.0, Timestamp('2022-02-26 00:00:00'), 1.0, 3, 'palli', 'cp']],
      dtype=object)

In [38]:
df.describe()

Unnamed: 0,P,Q,R
count,4.0,4.0,4.0
mean,0.653701,-0.405817,0.795606
std,0.777743,0.490037,1.357846
min,-0.005429,-1.023108,-1.051899
25%,0.018192,-0.686819,0.212302
50%,0.520866,-0.308641,1.121553
75%,1.156374,-0.027639,1.704857
max,1.578501,0.017119,1.991218


In [39]:
df.T

Unnamed: 0,2022-02-26,2022-02-27,2022-02-28,2022-03-01
P,1.578501,0.026066,1.015666,-0.005429
Q,0.017119,-0.042559,-1.023108,-0.574722
R,1.609403,0.633702,1.991218,-1.051899


In [40]:
df.sort_index(axis=0)

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,-0.042559,0.633702
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899


In [42]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,R,Q,P
2022-02-26,1.609403,0.017119,1.578501
2022-02-27,0.633702,-0.042559,0.026066
2022-02-28,1.991218,-1.023108,1.015666
2022-03-01,-1.051899,-0.574722,-0.005429


In [43]:
df.sort_values(by="Q")

Unnamed: 0,P,Q,R
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899
2022-02-27,0.026066,-0.042559,0.633702
2022-02-26,1.578501,0.017119,1.609403


# Selection

In [50]:
df["P"]

2022-02-26    1.578501
2022-02-27    0.026066
2022-02-28    1.015666
2022-03-01   -0.005429
Freq: D, Name: P, dtype: float64

In [51]:
df[0:2]

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,-0.042559,0.633702


In [52]:
df["20220227":"20220301"]

Unnamed: 0,P,Q,R
2022-02-27,0.026066,-0.042559,0.633702
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899


In [53]:
df.loc[dates[0]]

P    1.578501
Q    0.017119
R    1.609403
Name: 2022-02-26 00:00:00, dtype: float64

In [54]:
df.loc[:,["P","Q"]]

Unnamed: 0,P,Q
2022-02-26,1.578501,0.017119
2022-02-27,0.026066,-0.042559
2022-02-28,1.015666,-1.023108
2022-03-01,-0.005429,-0.574722


In [55]:
df.loc["20220228",["Q","R"]]

Q   -1.023108
R    1.991218
Name: 2022-02-28 00:00:00, dtype: float64

In [56]:
df.loc[dates[0],"P"]

1.5785009991935166

In [57]:
df.at[dates[0],"P"]

1.5785009991935166

In [58]:
df.iloc[2]

P    1.015666
Q   -1.023108
R    1.991218
Name: 2022-02-28 00:00:00, dtype: float64

In [59]:
df.iloc[0:1,1:2]

Unnamed: 0,Q
2022-02-26,0.017119


In [60]:
df.iloc[[0,2],[1]]

Unnamed: 0,Q
2022-02-26,0.017119
2022-02-28,-1.023108


In [61]:
df.iloc[1:2,:]

Unnamed: 0,P,Q,R
2022-02-27,0.026066,-0.042559,0.633702


In [62]:
df.iloc[:,0:2]

Unnamed: 0,P,Q
2022-02-26,1.578501,0.017119
2022-02-27,0.026066,-0.042559
2022-02-28,1.015666,-1.023108
2022-03-01,-0.005429,-0.574722


In [63]:
df.iloc[2,2]

1.9912175773452279

In [66]:
df.iat[2,2]

1.9912175773452279

In [67]:
df[df["P"]>0]

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,-0.042559,0.633702
2022-02-28,1.015666,-1.023108,1.991218


In [68]:
df[df>0]

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,,0.633702
2022-02-28,1.015666,,1.991218
2022-03-01,,,


In [69]:
df3 = df.copy()

In [70]:
df3

Unnamed: 0,P,Q,R
2022-02-26,1.578501,0.017119,1.609403
2022-02-27,0.026066,-0.042559,0.633702
2022-02-28,1.015666,-1.023108,1.991218
2022-03-01,-0.005429,-0.574722,-1.051899


In [71]:
df3["Q"] = ["one", "two", "three", "four"]

In [72]:
df3

Unnamed: 0,P,Q,R
2022-02-26,1.578501,one,1.609403
2022-02-27,0.026066,two,0.633702
2022-02-28,1.015666,three,1.991218
2022-03-01,-0.005429,four,-1.051899


In [73]:
df3[df3["Q"].isin(["two", "four"])]

Unnamed: 0,P,Q,R
2022-02-27,0.026066,two,0.633702
2022-03-01,-0.005429,four,-1.051899


In [74]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20220226", periods=6))

In [75]:
s1

2022-02-26    1
2022-02-27    2
2022-02-28    3
2022-03-01    4
2022-03-02    5
2022-03-03    6
Freq: D, dtype: int64

In [76]:
df["F"] = s1

In [77]:
df.at[dates[0], "P"] = 0

In [78]:
df

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.017119,1.609403,1
2022-02-27,0.026066,-0.042559,0.633702,2
2022-02-28,1.015666,-1.023108,1.991218,3
2022-03-01,-0.005429,-0.574722,-1.051899,4


In [79]:
df.iat[0, 1] = 0

In [80]:
df

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.0,1.609403,1
2022-02-27,0.026066,-0.042559,0.633702,2
2022-02-28,1.015666,-1.023108,1.991218,3
2022-03-01,-0.005429,-0.574722,-1.051899,4


In [81]:
df.loc[:, "R"] = np.array([5] * len(df))

In [82]:
df

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.0,5,1
2022-02-27,0.026066,-0.042559,5,2
2022-02-28,1.015666,-1.023108,5,3
2022-03-01,-0.005429,-0.574722,5,4


In [83]:
df4 = df.copy()

In [84]:
df4[df4>0] = -df4

In [85]:
df4

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.0,-5,-1
2022-02-27,-0.026066,-0.042559,-5,-2
2022-02-28,-1.015666,-1.023108,-5,-3
2022-03-01,-0.005429,-0.574722,-5,-4


In [88]:
df

Unnamed: 0,P,Q,R,F
2022-02-26,0.0,0.0,5,1
2022-02-27,0.026066,-0.042559,5,2
2022-02-28,1.015666,-1.023108,5,3
2022-03-01,-0.005429,-0.574722,5,4


# Missing Data