In [32]:
import pandas as pd # importing pandas as pd

**Pandas is a python library which is use to work with data in python. It stands for Panel data.**

**Like numpy have its own data structure namely "numpy arrays", pandas has two data structures namely "Series" and "Dataframe".**

Pandas Series is a one-dimensional labeled array capable of holding data of any type.
The axis labels are collectively called index. Pandas Series is nothing but a column in an excel sheet. Labels need not be unique.

Pandas dataframe is a multi dimensional data structure which is a preperation of rows and columns.

We can also load data in pandas from a csv sheet.

# Making a series

## List

In [33]:
s1 = pd.Series([1,2,3,4,5])
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [34]:
type(s1)

pandas.core.series.Series

![image.png](attachment:image.png)

## Dictionaries

In [35]:
# Adding custom indexes to the series
s1_with_custom_indexes = pd.Series([1,2,3,4,5], index=['a', 'b', 'c', 'd', 'e'])
s1_with_custom_indexes

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [36]:
# Making a series using dictionaries.
s2 = pd.Series({'a':'b', 'c':'d', 'e':'f', 'g':'h', 'i':'j'})
s2

# All the keys in dictionaries are converted into indexes and all the values are turned into normal values

a    b
c    d
e    f
g    h
i    j
dtype: object

In [37]:
s2_with_custom_indexes = pd.Series({'a':'b', 'c':'d', 'e':'f', 'g':'h', 'i':'j'}, index=['k', 'm', 'o', 'q'])
s2_with_custom_indexes


k    NaN
m    NaN
o    NaN
q    NaN
dtype: object

# Extracting elements

In [38]:
# Extracting a single element.
s3 = pd.Series([1,2,3,4,5,6,7,8,9]) # Making a series.
s3[3] # Fetching a particular value using index.

4

In [39]:
# Extracting elements from back.
s4 = pd.Series([1,2,3,4,5,6,7,8,9]) # Making a series.
s4[-3:] # Printing the last three objects from the series.

6    7
7    8
8    9
dtype: int64

In [40]:
# Extracting a sequence of element.
s5 = pd.Series([1,2,3,4,5,6,7,8,9]) # Making a series.
s5[:4] # Extracting the first four objects from that series.

0    1
1    2
2    3
3    4
dtype: int64

 ### Basic math operations on series.

In [41]:
s5 + 5 # Adding every value of a series by 5.
s5

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

In [42]:
s4 + s5 # Adding two series.

0     2
1     4
2     6
3     8
4    10
5    12
6    14
7    16
8    18
dtype: int64

In [43]:
s4-100 # Subtracting every value in s4 by 100.

0   -99
1   -98
2   -97
3   -96
4   -95
5   -94
6   -93
7   -92
8   -91
dtype: int64

In [44]:
s4/10 # Dividing every value in s4 by 10.

0    0.1
1    0.2
2    0.3
3    0.4
4    0.5
5    0.6
6    0.7
7    0.8
8    0.9
dtype: float64

In [45]:
s4*10 # Multiplying every value in s4 by 10.

0    10
1    20
2    30
3    40
4    50
5    60
6    70
7    80
8    90
dtype: int64

# Making a dataframe.

In [47]:
# This is how to create a basic dataframe.
pd.DataFrame({'Student_name':['Aviral', 'Abhineet', 'Seema'], "Score_in_maths":[100, 99, 98], 'Score in Science':[100, 99, 98], 'Total_score':[200, 198, 196]})

Unnamed: 0,Student_name,Score_in_maths,Score in Science,Total_score
0,Aviral,100,100,200
1,Abhineet,99,99,198
2,Seema,98,98,196


# Loading data from csv file.

In [51]:
dataset = pd.read_csv("Datasets/Iris.csv") # Loading the data from csv file.

In [52]:
dataset # Printing the data.

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


![image.png](attachment:image.png)

In [55]:
# Printing the first five records from this data set.
dataset.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [57]:
# Printing the last five records from this data set.
dataset.tail()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica
149,150,5.9,3.0,5.1,1.8,Iris-virginica


In [62]:
# Checking the shape (rows and colums count or dimensions) of the dataset.
dataset.shape
# Seeing the output, there are total 150 rows and 6 columns in that dataset.

(150, 6)

# Accessing particular rows and columns using "loc" and "iloc" functions in pandas.

### The difference between iloc and loc is that in iloc we have to give the index of the column while in loc we can just simply give the column name.

## iloc

In [65]:
dataset.iloc[5:11, 2:4]

Unnamed: 0,SepalWidthCm,PetalLengthCm
5,3.9,1.7
6,3.4,1.4
7,3.4,1.5
8,2.9,1.4
9,3.1,1.5
10,3.7,1.5


![image.png](attachment:image.png)

This image says that i want to access rows starting from index 5 to 11 as 11 is exclusive so, we will turn that into 10 and that I want to access columns starting from index 2 to 4.

If you want to access all the columns or rows then you don't need to give their values. But if you want to access particular columns but not rows or rows but not column then, you can put a colon in their place:

**dataset.iloc(:, start_column_index:end_column_index) # For you want to access particular columns but not rows**

**dataset.iloc(start_row_index:end_row_index,) # For you want to access particular rows but not columns**

## loc

In [70]:
dataset.loc[5:11, ("SepalWidthCm", "PetalLengthCm")]

Unnamed: 0,SepalWidthCm,PetalLengthCm
5,3.9,1.7
6,3.4,1.4
7,3.4,1.5
8,2.9,1.4
9,3.1,1.5
10,3.7,1.5
11,3.4,1.6


# How to drop rows and columns from a dataframe?

## Columns

In [75]:
dataset

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [71]:
dataset.drop('Id', axis=1)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


## Rows

In [74]:
dataset.drop(1, axis=0)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


![image.png](attachment:image.png)

![image.png](attachment:image.png)

# Additionals

In [76]:
# Function to find out mean.
dataset.mean()

Id               75.500000
SepalLengthCm     5.843333
SepalWidthCm      3.054000
PetalLengthCm     3.758667
PetalWidthCm      1.198667
dtype: float64

In [77]:
# Function to find out median.
dataset.median()

Id               75.50
SepalLengthCm     5.80
SepalWidthCm      3.00
PetalLengthCm     4.35
PetalWidthCm      1.30
dtype: float64

In [78]:
# Function to find out minimum value from each column.
dataset.min()

Id                         1
SepalLengthCm            4.3
SepalWidthCm               2
PetalLengthCm              1
PetalWidthCm             0.1
Species          Iris-setosa
dtype: object

In [79]:
# Function to find out maximum value from each column.
dataset.max()

Id                          150
SepalLengthCm               7.9
SepalWidthCm                4.4
PetalLengthCm               6.9
PetalWidthCm                2.5
Species          Iris-virginica
dtype: object

In [86]:
dataset['Species'].value_counts()

Iris-virginica     50
Iris-setosa        50
Iris-versicolor    50
Name: Species, dtype: int64