##  📚 Essential Basic Functionality

> Pandas is a foundational library in Python for working with structured data. It provides fast, flexible, and expressive tools designed to make data analysis and manipulation easy and intuitive. There are several essential functionalities that are fundamental to using Pandas effectively.

In [1]:
import pandas as pd
import numpy as np

### Head and tail

- Head shows the first 5 rows by default.
- Tail shows the last 5 rows by default.

In [2]:
long_series = pd.Series(np.random.randn(10))
long_series

0    0.588388
1   -0.382770
2   -0.510204
3   -1.571094
4   -0.685633
5    0.938993
6   -0.578108
7    0.710409
8    1.832939
9    1.394466
dtype: float64

In [3]:
# Head
long_series.head()

0    0.588388
1   -0.382770
2   -0.510204
3   -1.571094
4   -0.685633
dtype: float64

In [4]:
# Tail
long_series.tail()

5    0.938993
6   -0.578108
7    0.710409
8    1.832939
9    1.394466
dtype: float64

### Attributes and underlying data

- shape: gives the axis dimensions of the object, consistent with ndarray
- Axis labels:
    - Series: index (only axis)
    - DataFrame: index and columns


In [5]:
index = pd.date_range('20230101', periods=10)
df = pd.DataFrame(np.random.randn(10, 4), index=index, columns=list ('ABCD'))
df

Unnamed: 0,A,B,C,D
2023-01-01,-0.046698,-1.364044,0.984077,-0.186613
2023-01-02,-0.218112,0.479442,1.179653,1.151475
2023-01-03,1.619945,1.942223,-2.079273,-0.272685
2023-01-04,-0.359457,-0.605702,-0.068462,-0.385253
2023-01-05,0.242861,-0.593055,1.664027,0.444621
2023-01-06,0.656136,-0.903173,2.242868,-0.013155
2023-01-07,-1.266649,-0.201024,1.076361,0.704522
2023-01-08,0.143678,1.476587,0.088989,-0.043516
2023-01-09,0.502309,0.723638,-0.069563,-1.02225
2023-01-10,0.837307,1.551518,0.539915,-1.271965


In [6]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,a,b,c,d
2023-01-01,-0.046698,-1.364044,0.984077,-0.186613
2023-01-02,-0.218112,0.479442,1.179653,1.151475
2023-01-03,1.619945,1.942223,-2.079273,-0.272685
2023-01-04,-0.359457,-0.605702,-0.068462,-0.385253
2023-01-05,0.242861,-0.593055,1.664027,0.444621
2023-01-06,0.656136,-0.903173,2.242868,-0.013155
2023-01-07,-1.266649,-0.201024,1.076361,0.704522
2023-01-08,0.143678,1.476587,0.088989,-0.043516
2023-01-09,0.502309,0.723638,-0.069563,-1.02225
2023-01-10,0.837307,1.551518,0.539915,-1.271965


#### Numpy

- It is a reliable and consistent method to convert pandas objects to NumPy arrays, offering better control over data types and compatibility with extension types compared other methods.

In [7]:
df_numpy = df.to_numpy()
df_numpy

array([[-0.04669848, -1.36404443,  0.98407709, -0.18661307],
       [-0.21811214,  0.47944185,  1.17965282,  1.15147538],
       [ 1.61994491,  1.94222279, -2.07927264, -0.27268548],
       [-0.35945687, -0.60570155, -0.06846157, -0.38525266],
       [ 0.24286072, -0.59305452,  1.664027  ,  0.44462088],
       [ 0.65613583, -0.90317341,  2.24286757, -0.01315494],
       [-1.26664936, -0.20102415,  1.07636137,  0.70452185],
       [ 0.14367807,  1.47658705,  0.08898858, -0.04351575],
       [ 0.50230853,  0.72363787, -0.0695631 , -1.02225035],
       [ 0.83730723,  1.5515178 ,  0.53991482, -1.27196511]])

## Matching / broadcasting behavior

In [8]:
df_mathing_broadcasting = pd.DataFrame(
    {
        "one": pd.Series(np.random.randint(0, 10, 3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randint(0, 10, 3), index=["a", "b", "c"]),
        "three": pd.Series(np.random.randint(0, 10, 4), index=["a", "b", "c", "d"]),

    }
)
df_mathing_broadcasting

Unnamed: 0,one,two,three
a,3.0,8.0,0
b,7.0,4.0,3
c,2.0,1.0,7
d,,,5


### Sub

- It is a method used to perform subtraction between Series or DataFrames.

In [9]:
df_sub_l1 = df_mathing_broadcasting.iloc[2]
df_sub_l1

one      2.0
two      1.0
three    7.0
Name: c, dtype: float64

In [10]:
df_mathing_broadcasting.sub(df_sub_l1 , axis="columns")

Unnamed: 0,one,two,three
a,1.0,7.0,-7.0
b,5.0,3.0,-4.0
c,0.0,0.0,0.0
d,,,-2.0


## Missing data / operations with fill values

In [11]:
df_filldata = df.copy()
df_filldata.iloc[2, 2] = np.nan
df_filldata.head()

Unnamed: 0,a,b,c,d
2023-01-01,-0.046698,-1.364044,0.984077,-0.186613
2023-01-02,-0.218112,0.479442,1.179653,1.151475
2023-01-03,1.619945,1.942223,,-0.272685
2023-01-04,-0.359457,-0.605702,-0.068462,-0.385253
2023-01-05,0.242861,-0.593055,1.664027,0.444621


In [12]:
df_filldata.fillna(np.random.rand())
df_filldata.head()

Unnamed: 0,a,b,c,d
2023-01-01,-0.046698,-1.364044,0.984077,-0.186613
2023-01-02,-0.218112,0.479442,1.179653,1.151475
2023-01-03,1.619945,1.942223,,-0.272685
2023-01-04,-0.359457,-0.605702,-0.068462,-0.385253
2023-01-05,0.242861,-0.593055,1.664027,0.444621


## Boolean reductions

- It is a way to summarize a boolean result.
-  <code>empty, any(), all(), and bool()</code>

In [41]:
df_bool_reduction = pd.DataFrame({
    "A": [1, 2, 3],
    "B": [0, 0, 5],
    "C": [9, 9, 9]
})

df_bool_reduction

Unnamed: 0,A,B,C
0,1,0,9
1,2,0,9
2,3,5,9


In [42]:
(df_bool_reduction > 0).all()

A     True
B    False
C     True
dtype: bool

In [43]:
(df_bool_reduction > 0).any()

A    True
B    True
C    True
dtype: bool