# Pandas

### Pandas Stands for Panel Data and is the core library for data manipulation and data analysis

### It consists of single and multi-dimensional data-structures for data-manipulation

### Pandas Data-Structures

In [1]:
# 1 Single Dimensional is called series object 
# 2 Multi-Dimensional is called Data-Frame

In [2]:
import pandas as pd

### Series Object

In [3]:
s1 = pd.Series([1,2,3,4,5])

In [4]:
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [5]:
type(s1)

pandas.core.series.Series

In [6]:
s2 = pd.Series([1,2,3,4,5], index=[1,2,3,4,5])

In [7]:
s2

1    1
2    2
3    3
4    4
5    5
dtype: int64

In [8]:
s3 = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])

In [9]:
s3

a    1
b    2
c    3
d    4
e    5
dtype: int64

### Series Object From Dictionary

In [10]:
s4 = pd.Series({"a":10,"b":20,"c":30})

In [11]:
s4

a    10
b    20
c    30
dtype: int64

In [12]:
s5 = pd.Series({"a":10,"b":20,"c":30}, index=["b","a","d","c"])

In [13]:
s5

b    20.0
a    10.0
d     NaN
c    30.0
dtype: float64

In [14]:
s5["a"]=54 # Changing Values 

In [15]:
s5

b    20.0
a    54.0
d     NaN
c    30.0
dtype: float64

### Extracting Individual Elements

In [16]:
# Extracting a single Element

In [17]:
l1 = [10,20,30,40,50,60,70,80,90]

In [18]:
s6 = pd.Series(l1)
s6

0    10
1    20
2    30
3    40
4    50
5    60
6    70
7    80
8    90
dtype: int64

In [19]:
s6[5]

60

In [20]:
# Extracting a sequence of Elements

In [21]:
s6[5:]

5    60
6    70
7    80
8    90
dtype: int64

In [22]:
s6[:5]

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [23]:
# Extracting elements from back

In [24]:
s6[-3:]

6    70
7    80
8    90
dtype: int64

In [25]:
s6[:-3]

0    10
1    20
2    30
3    40
4    50
5    60
dtype: int64

### Maths operations on Series

In [26]:
s7 = pd.Series([1,2,3,4,5,6,7])

In [27]:
s7

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

In [28]:
s7 + 20

0    21
1    22
2    23
3    24
4    25
5    26
6    27
dtype: int64

In [29]:
s7 - 25

0   -24
1   -23
2   -22
3   -21
4   -20
5   -19
6   -18
dtype: int64

In [30]:
s8 = pd.Series([10,20,30,40,50,60,70])

In [31]:
s8

0    10
1    20
2    30
3    40
4    50
5    60
6    70
dtype: int64

In [32]:
s7 + s8

0    11
1    22
2    33
3    44
4    55
5    66
6    77
dtype: int64

# Pandas DataFrame

## DataFrame is a 2- Dimenionsal labelled data-structure

### A data - frame comprises of rows and columns 

In [33]:
d1 = pd.DataFrame({"Name":["Aditya","Sam","Jack"], "Marks": [100,20,30]})

In [34]:
d1

Unnamed: 0,Name,Marks
0,Aditya,100
1,Sam,20
2,Jack,30


In [35]:
d2 = pd.DataFrame({"Name":["Aditya","Sam","Jack"], "Marks": [100,20,30]},index=[1,2,3])

In [36]:
d2

Unnamed: 0,Name,Marks
1,Aditya,100
2,Sam,20
3,Jack,30


### DataFrame In-built Functions

In [37]:
iris = pd.read_csv("iris.csv") # Loading Dataset

In [38]:
iris.head() # This method is used to see the First Five Records

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [39]:
iris.tail() # This method is used to see the Last Five Records

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [40]:
iris.shape # To know Rows and Columns

(150, 5)

In [41]:
iris.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


### .iloc[ ]

In [42]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [43]:
iris.iloc[0:3,0:2] # This method is used to obtain records using index values

Unnamed: 0,sepal_length,sepal_width
0,5.1,3.5
1,4.9,3.0
2,4.7,3.2


In [44]:
iris.iloc[25:35,2:4]

Unnamed: 0,petal_length,petal_width
25,1.6,0.2
26,1.6,0.4
27,1.5,0.2
28,1.4,0.2
29,1.6,0.2
30,1.6,0.2
31,1.5,0.4
32,1.5,0.1
33,1.4,0.2
34,1.5,0.2


In [45]:
iris.iloc[25:35,2:]

Unnamed: 0,petal_length,petal_width,species
25,1.6,0.2,setosa
26,1.6,0.4,setosa
27,1.5,0.2,setosa
28,1.4,0.2,setosa
29,1.6,0.2,setosa
30,1.6,0.2,setosa
31,1.5,0.4,setosa
32,1.5,0.1,setosa
33,1.4,0.2,setosa
34,1.5,0.2,setosa


In [46]:
iris[0:10]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


### .loc[ ]

In [47]:
iris.loc[0:10,("sepal_length","petal_length")] # This method is also used to obtain records by their names
                                              # Here (0,10) is included

Unnamed: 0,sepal_length,petal_length
0,5.1,1.4
1,4.9,1.4
2,4.7,1.3
3,4.6,1.5
4,5.0,1.4
5,5.4,1.7
6,4.6,1.4
7,5.0,1.5
8,4.4,1.4
9,4.9,1.5


### Dropping Cloumns

In [48]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [49]:
iris.drop("sepal_length",axis=1) # Axis = 0 means Rows
                                 # Axis = 1 means Columns

Unnamed: 0,sepal_width,petal_length,petal_width,species
0,3.5,1.4,0.2,setosa
1,3.0,1.4,0.2,setosa
2,3.2,1.3,0.2,setosa
3,3.1,1.5,0.2,setosa
4,3.6,1.4,0.2,setosa
...,...,...,...,...
145,3.0,5.2,2.3,virginica
146,2.5,5.0,1.9,virginica
147,3.0,5.2,2.0,virginica
148,3.4,5.4,2.3,virginica


### Dropping Rows

In [50]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [51]:
i1 = iris.drop([1,2,3], axis = 0)

In [52]:
i1

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [53]:
i2 = iris.drop(iris.index[1:10], axis = 0) # To Drop a Sequence

In [54]:
i2.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
10,5.4,3.7,1.5,0.2,setosa
11,4.8,3.4,1.6,0.2,setosa
12,4.8,3.0,1.4,0.1,setosa
13,4.3,3.0,1.1,0.1,setosa


### More Pandas Functions

In [55]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [56]:
iris.min()

sepal_length       4.3
sepal_width        2.0
petal_length       1.0
petal_width        0.1
species         setosa
dtype: object

In [57]:
iris.max()

sepal_length          7.9
sepal_width           4.4
petal_length          6.9
petal_width           2.5
species         virginica
dtype: object

In [58]:
iris.mean()

sepal_length    5.843333
sepal_width     3.057333
petal_length    3.758000
petal_width     1.199333
dtype: float64

In [59]:
iris.median()

sepal_length    5.80
sepal_width     3.00
petal_length    4.35
petal_width     1.30
dtype: float64

In [60]:
iris.std()

sepal_length    0.828066
sepal_width     0.435866
petal_length    1.765298
petal_width     0.762238
dtype: float64

In [61]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [62]:
def half(s):
    return s*0.5

In [63]:
iris[["sepal_length","petal_length"]].apply(half) # Used to Apply something

Unnamed: 0,sepal_length,petal_length
0,2.55,0.70
1,2.45,0.70
2,2.35,0.65
3,2.30,0.75
4,2.50,0.70
...,...,...
145,3.35,2.60
146,3.15,2.50
147,3.25,2.60
148,3.10,2.70


In [64]:
def double(s):
    return s*2

In [65]:
iris[["sepal_length","petal_length"]].apply(double)

Unnamed: 0,sepal_length,petal_length
0,10.2,2.8
1,9.8,2.8
2,9.4,2.6
3,9.2,3.0
4,10.0,2.8
...,...,...
145,13.4,10.4
146,12.6,10.0
147,13.0,10.4
148,12.4,10.8


In [66]:
iris["species"].value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [67]:
iris["petal_length"].value_counts()

1.5    13
1.4    13
5.1     8
4.5     8
1.6     7
1.3     7
5.6     6
4.0     5
4.9     5
4.7     5
5.0     4
1.7     4
4.8     4
4.4     4
4.2     4
4.1     3
5.7     3
5.5     3
6.1     3
3.9     3
4.6     3
5.8     3
5.2     2
1.9     2
6.0     2
1.2     2
4.3     2
5.3     2
5.4     2
3.3     2
6.7     2
3.5     2
5.9     2
3.6     1
3.8     1
1.0     1
3.0     1
6.3     1
6.6     1
3.7     1
1.1     1
6.4     1
6.9     1
Name: petal_length, dtype: int64

In [68]:
iris.sort_values(by = "sepal_length")

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
...,...,...,...,...,...
122,7.7,2.8,6.7,2.0,virginica
118,7.7,2.6,6.9,2.3,virginica
117,7.7,3.8,6.7,2.2,virginica
135,7.7,3.0,6.1,2.3,virginica


In [69]:
iris.sort_values(by = ["sepal_length","petal_length",])

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
38,4.4,3.0,1.3,0.2,setosa
42,4.4,3.2,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
...,...,...,...,...,...
135,7.7,3.0,6.1,2.3,virginica
117,7.7,3.8,6.7,2.2,virginica
122,7.7,2.8,6.7,2.0,virginica
118,7.7,2.6,6.9,2.3,virginica


In [70]:
iris.sort_index(axis = 0)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [71]:
iris.sort_index(axis = 1)

Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,1.4,0.2,5.1,3.5,setosa
1,1.4,0.2,4.9,3.0,setosa
2,1.3,0.2,4.7,3.2,setosa
3,1.5,0.2,4.6,3.1,setosa
4,1.4,0.2,5.0,3.6,setosa
...,...,...,...,...,...
145,5.2,2.3,6.7,3.0,virginica
146,5.0,1.9,6.3,2.5,virginica
147,5.2,2.0,6.5,3.0,virginica
148,5.4,2.3,6.2,3.4,virginica


In [72]:
iris[1:10].isnull()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,False,False,False
6,False,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False


In [73]:
iris.notnull()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,True,True,True,True,True
1,True,True,True,True,True
2,True,True,True,True,True
3,True,True,True,True,True
4,True,True,True,True,True
...,...,...,...,...,...
145,True,True,True,True,True
146,True,True,True,True,True
147,True,True,True,True,True
148,True,True,True,True,True


In [74]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [75]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [76]:
pd.set_option("display.max_columns",150)
pd.set_option("display.max_rows",150)

In [77]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [78]:
iris.sort_index(axis=1)

Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,1.4,0.2,5.1,3.5,setosa
1,1.4,0.2,4.9,3.0,setosa
2,1.3,0.2,4.7,3.2,setosa
3,1.5,0.2,4.6,3.1,setosa
4,1.4,0.2,5.0,3.6,setosa
5,1.7,0.4,5.4,3.9,setosa
6,1.4,0.3,4.6,3.4,setosa
7,1.5,0.2,5.0,3.4,setosa
8,1.4,0.2,4.4,2.9,setosa
9,1.5,0.1,4.9,3.1,setosa


In [79]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

In [80]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa
