## What is Pandas?
Pandas stands for Python Data Analysis Library.
It is used to analyze, clean, explore, and manipulate data easily and efficiently.
You can think of it as a tool that allows you to handle data tables (rows and columns) — just like an Excel sheet — but with Python power.
It is built on top of NumPy, so it works very efficiently with numerical data.

Pandas = Excel + Python + Speed + Automation
You can use it to load data, filter, clean, calculate, and visualize it easily.

In [2]:
import pandas as pd
import numpy as np

In [6]:
np.arange(1,31).reshape(5,6)

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29, 30]])

In [85]:
# create dataframe
df = pd.DataFrame(data = np.arange(1,31).reshape(5,6),index = ["Row1", "Row2", "Row3", "Row4", "Row5"],
            columns = ["Column1", "Column2", "Column3", "Column4", "Column5", "Column6"])

In [9]:
df.head()

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [10]:
df.tail()

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [11]:
type(df)

pandas.core.frame.DataFrame

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Column1  5 non-null      int64
 1   Column2  5 non-null      int64
 2   Column3  5 non-null      int64
 3   Column4  5 non-null      int64
 4   Column5  5 non-null      int64
 5   Column6  5 non-null      int64
dtypes: int64(6)
memory usage: 280.0+ bytes


In [13]:
df.describe()

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
count,5.0,5.0,5.0,5.0,5.0,5.0
mean,13.0,14.0,15.0,16.0,17.0,18.0
std,9.486833,9.486833,9.486833,9.486833,9.486833,9.486833
min,1.0,2.0,3.0,4.0,5.0,6.0
25%,7.0,8.0,9.0,10.0,11.0,12.0
50%,13.0,14.0,15.0,16.0,17.0,18.0
75%,19.0,20.0,21.0,22.0,23.0,24.0
max,25.0,26.0,27.0,28.0,29.0,30.0


In [14]:
# indexing
# columnname, rowindex[loc], rowindex columnindex number[.iloc] -- index location
df.head()

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [15]:
df['Column1']

Row1     1
Row2     7
Row3    13
Row4    19
Row5    25
Name: Column1, dtype: int64

In [16]:
df[['Column1', 'Column2', 'Column3', 'Column4']]

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,1,2,3,4
Row2,7,8,9,10
Row3,13,14,15,16
Row4,19,20,21,22
Row5,25,26,27,28


In [17]:
type(df[['Column1', 'Column2', 'Column3', 'Column4']])

pandas.core.frame.DataFrame

In [18]:
type(df['Column1'])

pandas.core.series.Series

In [19]:
df.loc['Row2']

Column1     7
Column2     8
Column3     9
Column4    10
Column5    11
Column6    12
Name: Row2, dtype: int64

In [20]:
type(df.loc['Row2'])

pandas.core.series.Series

In [21]:
# using rowindex name loc
df.loc[['Row1','Row2', 'Row3', 'Row4']]

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24


In [22]:
df.head()

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [23]:
df.iloc[1:4,1:4]

Unnamed: 0,Column2,Column3,Column4
Row2,8,9,10
Row3,14,15,16
Row4,20,21,22


In [24]:
df.iloc[2:4,0:2]

Unnamed: 0,Column1,Column2
Row3,13,14
Row4,19,20


In [26]:
df.iloc[0:3,0:3]

Unnamed: 0,Column1,Column2,Column3
Row1,1,2,3
Row2,7,8,9
Row3,13,14,15


In [28]:
df.iloc[2:,4:]

Unnamed: 0,Column5,Column6
Row3,17,18
Row4,23,24
Row5,29,30


In [35]:
df.iloc[:,[0,5]]

Unnamed: 0,Column1,Column6
Row1,1,6
Row2,7,12
Row3,13,18
Row4,19,24
Row5,25,30


In [36]:
df.iloc[:,1:]

Unnamed: 0,Column2,Column3,Column4,Column5,Column6
Row1,2,3,4,5,6
Row2,8,9,10,11,12
Row3,14,15,16,17,18
Row4,20,21,22,23,24
Row5,26,27,28,29,30


In [39]:
##convert dataframe into arrays
df.iloc[:,2:].values

array([[ 3,  4,  5,  6],
       [ 9, 10, 11, 12],
       [15, 16, 17, 18],
       [21, 22, 23, 24],
       [27, 28, 29, 30]])

In [41]:
# operations
df.isnull().sum()

Column1    0
Column2    0
Column3    0
Column4    0
Column5    0
Column6    0
dtype: int64

In [80]:
df = pd.DataFrame(data=[[2,np.nan,3,np.nan], [1,3,4,7], [np.nan, 1, 5, 8]],index = ["Row1", "Row2", "Row3"],
            columns = ["Column1", "Column2", "Column3", "Column4"])
# NaN - Not a Number

In [48]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,2.0,,3,
Row2,1.0,3.0,4,7.0
Row3,,1.0,5,8.0


In [49]:
df.isnull().sum()

Column1    1
Column2    1
Column3    0
Column4    1
dtype: int64

In [51]:
df.isnull().sum() == 0

Column1    False
Column2    False
Column3     True
Column4    False
dtype: bool

In [52]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,2.0,,3,
Row2,1.0,3.0,4,7.0
Row3,,1.0,5,8.0


In [57]:
df["Column2"].value_counts()

Column2
3.0    1
1.0    1
Name: count, dtype: int64

In [58]:
df["Column4"].value_counts()

Column4
7.0    1
8.0    1
Name: count, dtype: int64

In [59]:
df["Column3"].unique()

array([3, 4, 5])

In [60]:
df["Column1"].unique()

array([ 2.,  1., nan])

In [63]:
df["Column2"]>2

Row1    False
Row2     True
Row3    False
Name: Column2, dtype: bool

In [81]:
df[df["Column3"]>3]

Unnamed: 0,Column1,Column2,Column3,Column4
Row2,1.0,3.0,4,7.0
Row3,,1.0,5,8.0


In [84]:
df[df["Column2"]>1]

Unnamed: 0,Column1,Column2,Column3,Column4
Row2,1.0,3.0,4,7.0


In [76]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row1,1,2,3,4,5,6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [88]:
df[df["Column1"]>5]

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row2,7,8,9,10,11,12
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [86]:
df[df["Column2"]>10]

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [90]:
df[df["Column5"]>15]

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row3,13,14,15,16,17,18
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30


In [92]:
df[df["Column3"]>15]

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
Row4,19,20,21,22,23,24
Row5,25,26,27,28,29,30
