### Pandas

Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

In [1]:
# import libraries
import numpy as np
import pandas as pd

In [2]:
# create a dataframe using pandas
df = pd.DataFrame(np.arange(0,20).reshape(5,4), index=["Row1","Row2","Row3","Row4","Row5"], columns=[
    "Column1","Column2","Column3","Column4"])

In [3]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [4]:
# accessing the elements
df.loc["Row1"]

Column1    0
Column2    1
Column3    2
Column4    3
Name: Row1, dtype: int32

In [5]:
# check the datatype - it will be a series (only 1 row or only one column data)
type(df.loc["Row1"])

pandas.core.series.Series

In [7]:
df.iloc[:,1:]

Unnamed: 0,Column2,Column3,Column4
Row1,1,2,3
Row2,5,6,7
Row3,9,10,11
Row4,13,14,15
Row5,17,18,19


In [8]:
# check the datatype - it will be a dataframe (multiple rows or columns)
type(df.iloc[:,1:])

pandas.core.frame.DataFrame

In [12]:
df.iloc[:,0]

Row1     0
Row2     4
Row3     8
Row4    12
Row5    16
Name: Column1, dtype: int32

In [13]:
# check the datatype
type(df.iloc[:,0])

pandas.core.series.Series

In [14]:
df.iloc[1:,:]

Unnamed: 0,Column1,Column2,Column3,Column4
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [16]:
# check the datatype
type(df.iloc[1:,:])

pandas.core.frame.DataFrame

In [17]:
# convert dataframe to array
df.iloc[:,1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

In [18]:
# value counts
df["Column1"].value_counts()

0     1
4     1
8     1
12    1
16    1
Name: Column1, dtype: int64

In [19]:
# check null/missing values in a dataframe
df.isnull().sum()

Column1    0
Column2    0
Column3    0
Column4    0
dtype: int64

In [20]:
# check for duplicated values
df.duplicated().sum()

0