# **What is Pandas?**

Pandas is an open-source Python library that offers various data structures and operations for manipulating data.


In [None]:
!pip install pandas

In [1]:
import pandas as pd

### What Can Pandas Do?
1. Is there a correlation between two or more columns?
2. What is average value?
3. Max value?
4. Min value?

### Why Pandas?
1. Manage and explore data.
2. Handle missing data
3. Clean up data
4. Multiple file formats
5. Data visualization

### The two main data structures in Pandas are
1. Series - A one-dimensional labelled array capable of holding data of any type (integer, string, float, python objects).
2. Dataframe - A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns.
![Series_DataFrame.png](attachment:Series_DataFrame.png)



## Creating a series from list

In [2]:
l = ["one", "two", "three"]
l

['one', 'two', 'three']

In [3]:
tuple(l)

('one', 'two', 'three')

In [4]:
ser = pd.Series(l)
ser

0      one
1      two
2    three
dtype: object

In [5]:
type(ser)

pandas.core.series.Series

## Creating a dataframe from list

In [6]:
df = pd.DataFrame(l)
df

Unnamed: 0,0
0,one
1,two
2,three


In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
pd.DataFrame([['one', 'two', 'three'], ['four', 'five', 'six'], ['seven', 'eight', 'nine']])

Unnamed: 0,0,1,2
0,one,two,three
1,four,five,six
2,seven,eight,nine


* Note: It automatically assigns row index and column index

## Creating a dataframe from dictionary

In [9]:
d = {
    "n": [1, 2, 3],
    "d": ["Sunday", "Monday", "Tuesday"],
    "c": ["red", "blue", "green"]
}
df = pd.DataFrame(d)
df

Unnamed: 0,n,d,c
0,1,Sunday,red
1,2,Monday,blue
2,3,Tuesday,green


* Note: Keys becomes the column name

In [10]:
d1 = {"Fruits": 21, "Vegetables":31}
pd.DataFrame(d1)

ValueError: If using all scalar values, you must pass an index

In [11]:
d1 = {"Fruits": [21], "Vegetables": [31]}
pd.DataFrame(d1)

Unnamed: 0,Fruits,Vegetables
0,21,31


* Make sure always the values are specified as an ordered sequence .i.e. list or tuples

### Specifying custom row and column name

In [12]:
df

Unnamed: 0,n,d,c
0,1,Sunday,red
1,2,Monday,blue
2,3,Tuesday,green


In [14]:
df.rename(index = {0: "Row1", 1: "Row2"})

Unnamed: 0,n,d,c
Row1,1,Sunday,red
Row2,2,Monday,blue
2,3,Tuesday,green


In [15]:
df

Unnamed: 0,n,d,c
0,1,Sunday,red
1,2,Monday,blue
2,3,Tuesday,green


In [16]:
df.columns

Index(['n', 'd', 'c'], dtype='object')

In [17]:
df.rename(columns = {'n': "Col1", 'd': "Col2", 'c': "Col3"}, inplace=True)

In [18]:
df

Unnamed: 0,Col1,Col2,Col3
0,1,Sunday,red
1,2,Monday,blue
2,3,Tuesday,green


In [19]:
df.dtypes

Col1     int64
Col2    object
Col3    object
dtype: object

In [21]:
df.transpose()

Unnamed: 0,0,1,2
Col1,1,2,3
Col2,Sunday,Monday,Tuesday
Col3,red,blue,green


In [22]:
df.T

Unnamed: 0,0,1,2
Col1,1,2,3
Col2,Sunday,Monday,Tuesday
Col3,red,blue,green


In [23]:
df

Unnamed: 0,Col1,Col2,Col3
0,1,Sunday,red
1,2,Monday,blue
2,3,Tuesday,green


## Load data from CSV

In [24]:
pd.read_csv("airtravel.csv")

Unnamed: 0,Month,"""1958""","""1959""","""1960"""
0,JAN,340,360,417
1,FEB,318,342,391
2,MAR,362,406,419
3,APR,348,396,461
4,MAY,363,420,472
5,JUN,435,472,535
6,JUL,491,548,622
7,AUG,505,559,606
8,SEP,404,463,508
9,OCT,359,407,461


In [27]:
pd.read_csv(r"Resource\airtravel2.csv", delimiter='|')

Unnamed: 0,Month,"""1958""","""1959""","""1960"""
0,JAN,340,360,417
1,FEB,318,342,391
2,MAR,362,406,419
3,APR,348,396,461
4,MAY,363,420,472
5,JUN,435,472,535
6,JUL,491,548,622
7,AUG,505,559,606
8,SEP,404,463,508
9,OCT,359,407,461


In [28]:
pd.read_csv("https://raw.githubusercontent.com/Opensourcefordatascience/Data-sets/master/blood_pressure.csv")

Unnamed: 0,patient,sex,agegrp,bp_before,bp_after
0,1,Male,30-45,143,153
1,2,Male,30-45,163,170
2,3,Male,30-45,153,168
3,4,Male,30-45,153,142
4,5,Male,30-45,146,141
...,...,...,...,...,...
115,116,Female,60+,152,152
116,117,Female,60+,161,152
117,118,Female,60+,165,174
118,119,Female,60+,149,151


In [29]:
pd.read_csv("https://raw.githubusercontent.com/Opensourcefordatascience/Data-sets/master/blood_pressure.csv", index_col= 0)

Unnamed: 0_level_0,sex,agegrp,bp_before,bp_after
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Male,30-45,143,153
2,Male,30-45,163,170
3,Male,30-45,153,168
4,Male,30-45,153,142
5,Male,30-45,146,141
...,...,...,...,...
116,Female,60+,152,152
117,Female,60+,161,152
118,Female,60+,165,174
119,Female,60+,149,151


## Read data from excel

In [30]:
pd.read_excel(r"Resource\ir211wk12sample.xls")

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total,Unnamed: 7,Bins,Frequency,Intervals,Unnamed: 11,Item Type,Frequency.1,Rel. Freq,% Freq.
0,2014-09-01,Central,Smith,Desk,2.0,125.0,250.0,,9.0,6.0,0-9,,Pencil,13.0,0.302326,0.302326
1,2015-06-17,Central,Kivell,Desk,5.0,125.0,625.0,,19.0,4.0,10-19,,Binder,15.0,0.348837,0.348837
2,2015-09-10,Central,Gill,Pencil,7.0,1.29,9.03,,29.0,4.0,20-29,,Pen,5.0,0.116279,0.116279
3,2015-11-17,Central,Jardine,Binder,11.0,4.99,54.89,,39.0,3.0,30-39,,Pen Set,7.0,0.162791,0.162791
4,2015-10-31,Central,Andrews,Pencil,14.0,1.29,18.06,,49.0,2.0,40-49,,Desk,3.0,0.069767,0.069767
5,2014-02-26,Central,Gill,Pen,27.0,19.99,539.73,,59.0,6.0,50-59,,,43.0,1.0,1.0
6,2014-10-05,Central,Morgan,Binder,28.0,8.99,251.72,,69.0,6.0,60-69,,,,,
7,2015-12-21,Central,Andrews,Binder,28.0,4.99,139.72,,79.0,3.0,70-79,,,,,
8,2014-02-09,Central,Jardine,Pencil,36.0,4.99,179.64,,89.0,3.0,80-89,,10-19,,,
9,2015-08-07,Central,Kivell,Pen Set,42.0,23.95,1005.9,,99.0,6.0,90-99,,,,,


In [31]:
pd.read_excel(r"Resource\ir211wk12sample.xls", sheet_name=1)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2
0,,Contextures Products,
1,,,
2,,PivotPower Premium Add-in,Time-saving tools for pivot table power users
3,,UserForms for Data Entry ebook Kit,Step by step instructions and videos
4,,Contextures Excel Tools Add-in,"Make instant backups, sort sheets, and many mo..."
5,,Data Validation Multi-Select Premium Kit,"Select multiple items from a listbox, to enter..."
6,,30 Excel Functions in 30 Days eBook Kit,Sample workbook and easy to follow user guide ...
7,,,
8,,Contextures Recommends,
9,,,


In [32]:
file = pd.ExcelFile(r"Resource\ir211wk12sample.xls")
file.sheet_names

['SalesOrders', 'MyLinks', 'Sheet1']

In [33]:
pd.read_excel(r"Resource\ir211wk12sample.xls", usecols=[0, 1, 2, 3])

Unnamed: 0,OrderDate,Region,Rep,Item
0,2014-09-01,Central,Smith,Desk
1,2015-06-17,Central,Kivell,Desk
2,2015-09-10,Central,Gill,Pencil
3,2015-11-17,Central,Jardine,Binder
4,2015-10-31,Central,Andrews,Pencil
5,2014-02-26,Central,Gill,Pen
6,2014-10-05,Central,Morgan,Binder
7,2015-12-21,Central,Andrews,Binder
8,2014-02-09,Central,Jardine,Pencil
9,2015-08-07,Central,Kivell,Pen Set


### Load data from html page

In [34]:
stock_Prices = pd.read_html("https://www.moneycontrol.com/india/stockpricequote/computers-software/larsentoubroinfotech/LI12")

In [35]:
stock_Prices

[    BUY    BUY.1     SELL SELL.1
 0   QTY    PRICE    PRICE    QTY
 1    12  5887.65  5893.20      3
 2     1  5887.10  5899.90      9
 3     1  5886.00  5899.95    148
 4     1  5881.10  5900.00      1
 5     2  5880.00  5900.50      3
 6  1078    Total    Total   1222,
     BUY    BUY.1     SELL SELL.1
 0   QTY    PRICE    PRICE    QTY
 1    12  5887.65  5893.20      3
 2     1  5887.10  5899.90      9
 3     1  5886.00  5899.95    148
 4     1  5881.10  5900.00      1
 5     2  5880.00  5900.50      3
 6  1078    Total    Total   1222,
                 0          1
 0            Open    6000.00
 1  Previous Close    6059.75
 2          Volume  450430.00
 3    Value (Lacs)   26622.89
 4         i  VWAP    5901.27
 5            Beta   0.880.87,
               0        1
 0          High  6019.85
 1           Low  5801.00
 2      UC Limit  6665.70
 3      LC Limit  5453.80
 4  52 Week High  7588.80
 5   52 Week Low  3525.00,
                       0       1
 0               TTM EPS  1

In [36]:
type(stock_Prices)

list

In [37]:
len(stock_Prices)

55

In [38]:
stock_Prices[0]

Unnamed: 0,BUY,BUY.1,SELL,SELL.1
0,QTY,PRICE,PRICE,QTY
1,12,5887.65,5893.20,3
2,1,5887.10,5899.90,9
3,1,5886.00,5899.95,148
4,1,5881.10,5900.00,1
5,2,5880.00,5900.50,3
6,1078,Total,Total,1222


In [39]:
stock_Prices[2]

Unnamed: 0,0,1
0,Open,6000.00
1,Previous Close,6059.75
2,Volume,450430.00
3,Value (Lacs),26622.89
4,i VWAP,5901.27
5,Beta,0.880.87


In [40]:
stock_Prices[3]

Unnamed: 0,0,1
0,High,6019.85
1,Low,5801.0
2,UC Limit,6665.7
3,LC Limit,5453.8
4,52 Week High,7588.8
5,52 Week Low,3525.0


In [41]:
stock_Prices[3].set_index(0)

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
High,6019.85
Low,5801.0
UC Limit,6665.7
LC Limit,5453.8
52 Week High,7588.8
52 Week Low,3525.0


### Saving dataframe

In [42]:
anotherdf = pd.read_csv("https://raw.githubusercontent.com/Opensourcefordatascience/Data-sets/master/blood_pressure.csv", index_col= 0)

In [43]:
anotherdf.to_csv("bloodpressure.csv")

In [44]:
anotherdf.to_dict(orient='index')

{1: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 143, 'bp_after': 153},
 2: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 163, 'bp_after': 170},
 3: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 153, 'bp_after': 168},
 4: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 153, 'bp_after': 142},
 5: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 146, 'bp_after': 141},
 6: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 150, 'bp_after': 147},
 7: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 148, 'bp_after': 133},
 8: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 153, 'bp_after': 141},
 9: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 153, 'bp_after': 131},
 10: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 158, 'bp_after': 125},
 11: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 149, 'bp_after': 164},
 12: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 173, 'bp_after': 159},
 13: {'sex': 'Male', 'agegrp': '30-45', 'bp_before': 165, 'bp_after': 135},
 14: {'sex': 'Male', 

In [45]:
anotherdf.to_html("bloodpressure.html")