### Pandas is an open-source library that is made mainly for working with relational or labeled data both easily and intuitively. It provides various data structures and operations for manipulating numerical data and time series. This library is built on top of the NumPy library. Pandas is fast and it has high performance & productivity for users.

### Advantages
1.Fast and efficient for manipulating and analyzing data.

2.Data from different file objects can be loaded.

3.Easy handling of missing data (represented as NaN) in floating point as

4.well as non-floating point data

5.Size mutability: columns can be inserted and deleted from DataFrame and

6.higher dimensional objects

7.Data set merging and joining.

8.Flexible reshaping and pivoting of data sets

9.Provides time-series functionality.

10.Powerful group by functionality for performing split-apply-combine operations on data sets.

Pandas is about process the data, manipulate the data, understand the data

In [1]:
# Installation code for pandas
!pip install pandas



In [2]:
# Importing required libraries
import pandas as pd
import numpy as np

### The Pandas provides two data structures for processing the data, i.e.,
1.Series and

2.DataFrame

1.Series

It is defined as a one-dimensional array that is capable of storing various data types. The row labels of series are called the index. We can easily convert the list, tuple, and dictionary into series using "series' method. A Series cannot contain multiple columns.

2.DataFrame

DataFrame is defined as a standard way to store data and has two different indexes, i.e., row index and column index.

In [None]:
#creating series using list

l1 = [1,2,3,4,5,6]
index_label = ['a','b','c','d','e','f']
series  =  pd.Series(data=l1, index= index_label)
print(series)

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64


In [3]:
pd.Series([1,2,3,4,5,6])

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [None]:
#creating series using tuple

t1 = (1,2,3,5,6)
index_label = ['a','b','c','d','e']
series2  =  pd.Series(data=t1 , index= index_label )
print(series2)

a    1
b    2
c    3
d    5
e    6
dtype: int64


In [4]:
pd.Series((1,2,3,5,6))

0    1
1    2
2    3
3    5
4    6
dtype: int64

In [None]:
#creating series using dictionary
dict1   = {'a': 10, 'b':20, 'c':30,'d':40}

series3  =  pd.Series(data=dict1)

print(series3)

a    10
b    20
c    30
d    40
dtype: int64


In [5]:
pd.Series({'a': 10, 'b':20, 'c':30,'d':40})

a    10
b    20
c    30
d    40
dtype: int64

In [6]:
np.arange(0,5)

array([0, 1, 2, 3, 4])

In [10]:
pd.Series(np.arange(0,10,2))

0    0
1    2
2    4
3    6
4    8
dtype: int32

In [9]:
a=pd.Series(np.arange(0,10,2))

In [11]:
#series created using numpy array

array1  = np.arange(0,5)
series4 =  pd.Series(data= np.arange(0,10,2), index=array1)
print(series4)

0    0
1    2
2    4
3    6
4    8
dtype: int32


In [12]:
array1  = np.arange(0,10,2)
i = ['a','b','c','d','e']
series11 =  pd.Series(data= array1, index=i)
print(series11)

a    0
b    2
c    4
d    6
e    8
dtype: int32


In [16]:
pd.DataFrame([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16


In [17]:
# creating DataFrame with list
columns = ["w","x","y","z"]
index = ["A","B","C","D"]
data = [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]

In [18]:
# Creating dataframe
df = pd.DataFrame(data,index = index, columns = columns)

In [19]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16


In [23]:
df.z

A     4
B     8
C    12
D    16
Name: z, dtype: int64

In [24]:
df['z']

A     4
B     8
C    12
D    16
Name: z, dtype: int64

In [7]:
# grab/select a specied  column in the dataframe
df["w"]

A     1
B     5
C     9
D    13
Name: w, dtype: int64

In [8]:
df["z"]

A     4
B     8
C    12
D    16
Name: z, dtype: int64

In [26]:
df.columns

Index(['w', 'x', 'y', 'z'], dtype='object')

In [27]:
df[['x', 'y', 'z']]

Unnamed: 0,x,y,z
A,2,3,4
B,6,7,8
C,10,11,12
D,14,15,16


In [30]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16


In [29]:
df.loc['C']

w     9
x    10
y    11
z    12
Name: C, dtype: int64

In [25]:
# retrive/select two columns
df[["w","x"]]

Unnamed: 0,w,x
A,1,2
B,5,6
C,9,10
D,13,14


In [10]:
# calling through dot operating
df.w

A     1
B     5
C     9
D    13
Name: w, dtype: int64

In [11]:
df.z

A     4
B     8
C    12
D    16
Name: z, dtype: int64

In [None]:
df.x

A     2
B     6
C    10
D    14
Name: x, dtype: int64

In [None]:
# selecting/grab  rows
df["A"]   # it is only confined to columns not rows

In [14]:
# To select specific row you have to use loc attribute
df.loc["A"]

w    1
x    2
y    3
z    4
Name: A, dtype: int64

In [None]:
df.loc[['A','B']]

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8


In [33]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16


In [37]:
df.iloc[2:4,2:4]

Unnamed: 0,y,z
C,11,12
D,15,16


In [38]:
df.iloc[2:,2:]

Unnamed: 0,y,z
C,11,12
D,15,16


In [31]:
# selecting through index location we need to use iloc(index_location)
df.iloc[0:2,0:2]

Unnamed: 0,w,x
A,1,2
B,5,6


In [None]:
# selecting only first column
df.iloc[:,1]

A     2
B     6
C    10
D    14
Name: x, dtype: int64

In [None]:
# Adding new column to DataFrame
df["new"] = [1,2,3,4]
df

Unnamed: 0,w,x,y,z,new
A,1,2,3,4,1
B,5,6,7,8,2
C,9,10,11,12,3
D,13,14,15,16,4


In [39]:
df['Moses']=[2,3,4,5]

In [40]:
df

Unnamed: 0,w,x,y,z,Moses
A,1,2,3,4,2
B,5,6,7,8,3
C,9,10,11,12,4
D,13,14,15,16,5


In [41]:
df.loc["Mo"]=[1,2,3,4,5]

In [42]:
df

Unnamed: 0,w,x,y,z,Moses
A,1,2,3,4,2
B,5,6,7,8,3
C,9,10,11,12,4
D,13,14,15,16,5
Mo,1,2,3,4,5


In [43]:
# inorder to add new row to the DataFrame
df.loc["row"] = [1,2,3,4,5]
df

Unnamed: 0,w,x,y,z,Moses
A,1,2,3,4,2
B,5,6,7,8,3
C,9,10,11,12,4
D,13,14,15,16,5
Mo,1,2,3,4,5
row,1,2,3,4,5


In [47]:
# delete 
df.drop("Moses",axis=1,inplace=True)

In [48]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16
Mo,1,2,3,4
row,1,2,3,4


In [None]:
df.drop("new",axis = 1)

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16
row,1,2,3,4


In [None]:
df  # the new row not deleted permanently .To delete it permanently 
    # we need to use inplace = True

Unnamed: 0,w,x,y,z,new
A,1,2,3,4,1
B,5,6,7,8,2
C,9,10,11,12,3
D,13,14,15,16,4
row,1,2,3,4,5


In [None]:
df.drop("new",axis = 1, inplace = True)

In [None]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16
row,1,2,3,4


In [49]:
df

Unnamed: 0,w,x,y,z
A,1,2,3,4
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16
Mo,1,2,3,4
row,1,2,3,4


In [52]:
# if you want to delete data from the row ,just change axis = 0
df.drop("A",axis = 0,inplace=True)

In [53]:
df

Unnamed: 0,w,x,y,z
B,5,6,7,8
C,9,10,11,12
D,13,14,15,16
Mo,1,2,3,4
row,1,2,3,4


In [54]:
# Reseting index
df.reset_index()

Unnamed: 0,index,w,x,y,z
0,B,5,6,7,8
1,C,9,10,11,12
2,D,13,14,15,16
3,Mo,1,2,3,4
4,row,1,2,3,4


In [58]:
#add new index

df["Nnames"] = ["F","G","H","I","M"]
df

Unnamed: 0,w,x,y,z,Nnames
B,5,6,7,8,F
C,9,10,11,12,G
D,13,14,15,16,H
Mo,1,2,3,4,I
row,1,2,3,4,M


In [61]:
df.set_index("Nnames",inplace=True) # setting ne index name

In [62]:
df #it was temporary transformation to add permanent tranformation ,
    #inplace=True

Unnamed: 0_level_0,w,x,y,z
Nnames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,5,6,7,8
G,9,10,11,12
H,13,14,15,16
I,1,2,3,4
M,1,2,3,4
