# Pandas Examples

In [1]:
import pandas as pd
import numpy as np

d = {'col1': [1, 2], 'col2': [3, 4]}         #To create a simple table which contains 2D data
print(d)

df = pd.DataFrame(data=d)
df

{'col1': [1, 2], 'col2': [3, 4]}


Unnamed: 0,col1,col2
0,1,3
1,2,4


# Importing an Excel file using pandas Dataframe

In [2]:
data = pd.read_excel('HiWi_Work_Hours.xlsx')    # To read the excel file
data = data.dropna(axis='columns', how='all')   # To drop empty columns
data

Unnamed: 0,Date,Timing,Duration (Hrs)
0,20.10.2020,16.30 - 18.30,2.0
1,21.10.2020,15.30 - 17.00,1.5
2,22.10.2020,16.30 - 18.30,2.0
3,23.10.2020,16.30 - 18.30,2.0
4,24.10.2020,10.00 - 11.00,1.0
5,25.10..2020,10.00 - 11.00,1.0
6,26.10.2020,16.30 - 18.00,1.5
7,27.10.2020,10.15 - 12.15,2.0


The following cells demonstrates how to get the index and column name of the dataset and how to access a particular column of the dataset

In [3]:
display(data.index)    #To access index
display(data.columns)  #To get column names
data.values            #To get all the values in all the columns of the dataset

RangeIndex(start=0, stop=8, step=1)

Index(['Date', 'Timing ', 'Duration (Hrs)'], dtype='object')

array([['20.10.2020', '16.30 - 18.30 ', 2.0],
       ['21.10.2020', '15.30 - 17.00', 1.5],
       ['22.10.2020', '16.30 - 18.30', 2.0],
       ['23.10.2020', '16.30 - 18.30', 2.0],
       ['24.10.2020', '10.00 - 11.00', 1.0],
       ['25.10..2020', '10.00 - 11.00', 1.0],
       ['26.10.2020', '16.30 - 18.00', 1.5],
       ['27.10.2020', '10.15 - 12.15', 2.0]], dtype=object)

In [4]:
data['Date']     #To access particular column

0     20.10.2020
1     21.10.2020
2     22.10.2020
3     23.10.2020
4     24.10.2020
5    25.10..2020
6     26.10.2020
7     27.10.2020
Name: Date, dtype: object

* For loop goes through every row of the datset
* To access index use i.index
* To access column names use i.column
* To access values use i.values

In [5]:
for i in data['Duration (Hrs)']:
    print(i+1)

3.0
2.5
3.0
3.0
2.0
2.0
2.5
3.0


# Pandas interval index

The following cell demontrstes how to construct interval index

Immutable index of intervals that are closed on the same side.



In [6]:
pd.interval_range(0,10)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5], (5, 6], (6, 7], (7, 8], (8, 9], (9, 10]],
              closed='right',
              dtype='interval[int64]')

In [7]:
ar = np.array([[2,0,0],[0,1,0],[0,0,2]])

a = pd.IntervalIndex.from_breaks([1,2.333,3.666,5])
b = pd.IntervalIndex.from_breaks([6,7.333,8.67,10])

print(a)
print(b)


IntervalIndex([(1.0, 2.333], (2.333, 3.666], (3.666, 5.0]],
              closed='right',
              dtype='interval[float64]')
IntervalIndex([(6.0, 7.333], (7.333, 8.67], (8.67, 10.0]],
              closed='right',
              dtype='interval[float64]')


Make a MultiIndex from the cartesian product of multiple iterables.



In [8]:
mult_idx = pd.MultiIndex.from_product([a, b], names=['from', 'to'])
mult_idx

MultiIndex([(  (1.0, 2.333],  (6.0, 7.333]),
            (  (1.0, 2.333], (7.333, 8.67]),
            (  (1.0, 2.333],  (8.67, 10.0]),
            ((2.333, 3.666],  (6.0, 7.333]),
            ((2.333, 3.666], (7.333, 8.67]),
            ((2.333, 3.666],  (8.67, 10.0]),
            (  (3.666, 5.0],  (6.0, 7.333]),
            (  (3.666, 5.0], (7.333, 8.67]),
            (  (3.666, 5.0],  (8.67, 10.0])],
           names=['from', 'to'])

In [9]:
pd.DataFrame(data=ar.flatten(),index=mult_idx)

Unnamed: 0_level_0,Unnamed: 1_level_0,0
from,to,Unnamed: 2_level_1
"(1.0, 2.333]","(6.0, 7.333]",2
"(1.0, 2.333]","(7.333, 8.67]",0
"(1.0, 2.333]","(8.67, 10.0]",0
"(2.333, 3.666]","(6.0, 7.333]",0
"(2.333, 3.666]","(7.333, 8.67]",1
"(2.333, 3.666]","(8.67, 10.0]",0
"(3.666, 5.0]","(6.0, 7.333]",0
"(3.666, 5.0]","(7.333, 8.67]",0
"(3.666, 5.0]","(8.67, 10.0]",2
