# *Pandas Tutorial (day-11)*
*This Notebook explains...*

# **Install libraries**

In [76]:
#pip install pandas
#pip install numpy

# **Import libraries**

In [77]:
import pandas as pd
import numpy as np

# **Object Creation**

In [78]:
s = pd.Series([1, 3, np.nan, 5, 7, 8, 9])
s

0    1.0
1    3.0
2    NaN
3    5.0
4    7.0
5    8.0
6    9.0
dtype: float64

In [79]:
dates = pd.date_range("20220802", periods=6)
dates

DatetimeIndex(['2022-08-02', '2022-08-03', '2022-08-04', '2022-08-05',
               '2022-08-06', '2022-08-07'],
              dtype='datetime64[ns]', freq='D')

In [80]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [81]:
from pandas import Categorical, array


df2 = pd.DataFrame(
  {
    "A": 1.0,
    "B": pd.Timestamp("20220802"),
    "C": pd.Series(1, index=list(range(4)), dtype="float32"),
    "D": np.array([3] *4, dtype="int32"),
    "E": pd.Categorical(["girl", "woman", "girl", "woman"]),
    "F": "females",
    }
)    
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-08-02,1.0,3,girl,females
1,1.0,2022-08-02,1.0,3,woman,females
2,1.0,2022-08-02,1.0,3,girl,females
3,1.0,2022-08-02,1.0,3,woman,females


In [82]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# **Data View**

In [83]:
df.head(2)

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002


In [84]:
df.tail(2)

Unnamed: 0,A,B,C,D
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [85]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

# **Convert to Numpy**

In [86]:
df.to_numpy()

array([[-0.67361137, -0.10849114, -0.05480498, -0.9867682 ],
       [-0.88093121,  0.37324031,  0.88236356,  0.14500215],
       [-0.44686102, -0.0713687 , -0.99142121,  0.94468392],
       [ 0.7814025 , -0.52617266,  0.74432759,  0.56257555],
       [ 0.30446906,  0.690623  , -0.10017459,  0.89218817],
       [-1.25862487,  0.04801167,  0.5222181 , -0.55380428]])

# **Describe Data**

In [87]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.362359,0.06764,0.167085,0.167313
std,0.765386,0.420677,0.697675,0.792262
min,-1.258625,-0.526173,-0.991421,-0.986768
25%,-0.829101,-0.099211,-0.088832,-0.379103
50%,-0.560236,-0.011679,0.233707,0.353789
75%,0.116637,0.291933,0.6888,0.809785
max,0.781402,0.690623,0.882364,0.944684


# **To Transpose Data**

In [88]:
df.T

Unnamed: 0,2022-08-02,2022-08-03,2022-08-04,2022-08-05,2022-08-06,2022-08-07
A,-0.673611,-0.880931,-0.446861,0.781402,0.304469,-1.258625
B,-0.108491,0.37324,-0.071369,-0.526173,0.690623,0.048012
C,-0.054805,0.882364,-0.991421,0.744328,-0.100175,0.522218
D,-0.986768,0.145002,0.944684,0.562576,0.892188,-0.553804


# **Data Sorting**

In [89]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [90]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-07,-1.258625,0.048012,0.522218,-0.553804
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-06,0.304469,0.690623,-0.100175,0.892188


In [91]:
df["A"]

2022-08-02   -0.673611
2022-08-03   -0.880931
2022-08-04   -0.446861
2022-08-05    0.781402
2022-08-06    0.304469
2022-08-07   -1.258625
Freq: D, Name: A, dtype: float64

In [92]:
df ["B"]

2022-08-02   -0.108491
2022-08-03    0.373240
2022-08-04   -0.071369
2022-08-05   -0.526173
2022-08-06    0.690623
2022-08-07    0.048012
Freq: D, Name: B, dtype: float64

In [93]:
df["C"]

2022-08-02   -0.054805
2022-08-03    0.882364
2022-08-04   -0.991421
2022-08-05    0.744328
2022-08-06   -0.100175
2022-08-07    0.522218
Freq: D, Name: C, dtype: float64

# **Row Wise Selection**

In [94]:
df[0:]

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


# **To Locate Specific Data**

In [95]:
df.loc["20220802": "20220803", ["A", "B"]]

Unnamed: 0,A,B
2022-08-02,-0.673611,-0.108491
2022-08-03,-0.880931,0.37324


In [96]:
df.iloc[:2]

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002


# **Boolean Operators**

In [97]:
df

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [98]:
df [df > 0]

Unnamed: 0,A,B,C,D
2022-08-02,,,,
2022-08-03,,0.37324,0.882364,0.145002
2022-08-04,,,,0.944684
2022-08-05,0.781402,,0.744328,0.562576
2022-08-06,0.304469,0.690623,,0.892188
2022-08-07,,0.048012,0.522218,


In [99]:
df [df ["A"] < 0]

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [100]:
df2 = df.copy()
df2

Unnamed: 0,A,B,C,D
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768
2022-08-03,-0.880931,0.37324,0.882364,0.145002
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684
2022-08-05,0.781402,-0.526173,0.744328,0.562576
2022-08-06,0.304469,0.690623,-0.100175,0.892188
2022-08-07,-1.258625,0.048012,0.522218,-0.553804


In [101]:
df2["E"] = ["one", "one", "two", "three", "one", "three"]
df2

Unnamed: 0,A,B,C,D,E
2022-08-02,-0.673611,-0.108491,-0.054805,-0.986768,one
2022-08-03,-0.880931,0.37324,0.882364,0.145002,one
2022-08-04,-0.446861,-0.071369,-0.991421,0.944684,two
2022-08-05,0.781402,-0.526173,0.744328,0.562576,three
2022-08-06,0.304469,0.690623,-0.100175,0.892188,one
2022-08-07,-1.258625,0.048012,0.522218,-0.553804,three


In [107]:
from statistics import mean


('df2', 'pd.concat([A, B, C, D])')

('df2', 'pd.concat([A, B, C, D])')

In [109]:
df2 = df2.iloc[ : ,0:2]
df2

Unnamed: 0,A,B
2022-08-02,-0.673611,-0.108491
2022-08-03,-0.880931,0.37324
2022-08-04,-0.446861,-0.071369
2022-08-05,0.781402,-0.526173
2022-08-06,0.304469,0.690623
2022-08-07,-1.258625,0.048012
