# DataFrames
- 2 dimensional data structure
- table with multiple rows and columns

In [142]:
import pandas as pd

# constructing dataframe from dictionary

In [4]:
data1 = {
    'name':['Ram','Shyam','Hari','Sita'],
    'age':[22,23,24,21],
    'marks':[79,91,81,90]
}
df = pd.DataFrame(data1)
df

Unnamed: 0,name,age,marks
0,Ram,22,79
1,Shyam,23,91
2,Hari,24,81
3,Sita,21,90


In [5]:
df.dtypes

name     object
age       int64
marks     int64
dtype: object

In [144]:
# Note: inferred data type is int64
# this can be changed as well
import numpy as np
data1 = {
    'age':[22,23,24,21],
    'marks':[79,91,81,90]
}
df = pd.DataFrame(data1,dtype = np.int8)
df

Unnamed: 0,age,marks
0,22,79
1,23,91
2,24,81
3,21,90


In [26]:
df.dtypes

age      int8
marks    int8
dtype: object

# constructing dataframe from ndarray

In [145]:
mydata = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(mydata)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


- provide the name for columns
- the above output might appear confusing. So, it will be effective to provide
- the name for columns
- it can be done by using the parameter columns

In [146]:
mydata = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(mydata, columns=['detail1','detail2','detail3'],
                 index=['a','b','c'])
df

Unnamed: 0,detail1,detail2,detail3
a,1,2,3
b,4,5,6
c,7,8,9


- Let's try to change the name of the indexes as well
- this can be done using the paramter index

In [147]:
mydata = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(mydata, columns=['ktm','brt','drn'],
                 index=['Sunday','Monday','Tuesday'])
df

Unnamed: 0,ktm,brt,drn
Sunday,1,2,3
Monday,4,5,6
Tuesday,7,8,9


# access the value of ktm only

- method 1
- for accessing the value we will use the name of dataframe which is df in our
- case followed by the name of the column using dot operator(.)


In [149]:
df.ktm

Sunday     1
Monday     4
Tuesday    7
Name: ktm, dtype: int32

- method 2 
- we can access the data of particular column this way as well:


In [150]:
df['ktm']

Sunday     1
Monday     4
Tuesday    7
Name: ktm, dtype: int32

# loc and iloc

In [151]:
# display the value of sunday 
# loc could be used
# Access a group of rows and columns by label(s) or a boolean array.
df.loc['Sunday']

ktm    1
brt    2
drn    3
Name: Sunday, dtype: int32

In [152]:
# access the value of Sunday and Monday
df.loc[['Sunday','Monday']]
# Note: 
#     using [[]] returns a DataFrame

Unnamed: 0,ktm,brt,drn
Sunday,1,2,3
Monday,4,5,6


In [153]:
# access the detail of sunday for brt 
df.loc['Sunday']['brt']

2

In [87]:
# access the detail of sunday for brt and drn only
df.loc['Sunday'][['brt','drn']]

brt    2
drn    3
Name: Sunday, dtype: int32

In [154]:
# access the detail of Sunday and Monday for brt and drn only
df.loc[['Sunday','Monday']][['brt','drn']]

Unnamed: 0,brt,drn
Sunday,2,3
Monday,5,6


# Constructing DataFrame from Series

In [155]:
my_series = pd.Series([1,2,3,4], index = ['Sun','Mon','Tue','Wed'])
df = pd.DataFrame(my_series)
df

Unnamed: 0,0
Sun,1
Mon,2
Tue,3
Wed,4


In [156]:
# let's name the column something
my_series = pd.Series([1,2,3,4], index = ['Sun','Mon','Tue','Wed'])
df = pd.DataFrame(my_series, columns=['kathmandu'])
df

Unnamed: 0,kathmandu
Sun,1
Mon,2
Tue,3
Wed,4


- let's check loc and iloc

In [157]:
# construct one sample dataframe 
mydata = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(mydata, columns=['ktm','brt','drn'],
                 index=['Sunday','Monday','Tuesday'])
df

Unnamed: 0,ktm,brt,drn
Sunday,1,2,3
Monday,4,5,6
Tuesday,7,8,9


In [158]:
# loc access the detail of row Sunday without any issue
df.loc['Sunday']

ktm    1
brt    2
drn    3
Name: Sunday, dtype: int32

In [159]:
df.iloc['Sunday']

TypeError: Cannot index by location index with a non-integer key

- TypeError: Cannot index by location index with a non-integer key is displayed
- implying that for accessing integer key iloc is used

    

In [161]:
# construct one sample dataframe with index as integers
mydata = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(mydata, columns=['ktm','brt','drn'],
                 index=[1,2,3])
df

Unnamed: 0,ktm,brt,drn
1,1,2,3
2,4,5,6
3,7,8,9


In [162]:
df.iloc[1]

ktm    4
brt    5
drn    6
Name: 2, dtype: int32

# Purely integer-location based indexing for selection by position.

In [129]:
# construct a dataframe with integer as index
mydata = np.arange(1,101).reshape(20,5)
col_name = ['ktm','brt','drn','pkr','jkp']
df = pd.DataFrame(mydata,index=np.arange(0,20),columns=col_name)
df

Unnamed: 0,ktm,brt,drn,pkr,jkp
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15
3,16,17,18,19,20
4,21,22,23,24,25
5,26,27,28,29,30
6,31,32,33,34,35
7,36,37,38,39,40
8,41,42,43,44,45
9,46,47,48,49,50


In [130]:
df.iloc[19]

ktm     96
brt     97
drn     98
pkr     99
jkp    100
Name: 19, dtype: int32

In [132]:
df.iloc[0:7]

Unnamed: 0,ktm,brt,drn,pkr,jkp
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15
3,16,17,18,19,20
4,21,22,23,24,25
5,26,27,28,29,30
6,31,32,33,34,35


In [133]:
df.iloc[0:7:2]
# slicing 
# start at 0 stop at 7 and step/ update 2

Unnamed: 0,ktm,brt,drn,pkr,jkp
0,1,2,3,4,5
2,11,12,13,14,15
4,21,22,23,24,25
6,31,32,33,34,35


In [134]:
df.iloc[1:7:2]
# slicing 
# start at 1 stop at 7 and step/ update 2
# 1 3 5

Unnamed: 0,ktm,brt,drn,pkr,jkp
1,6,7,8,9,10
3,16,17,18,19,20
5,26,27,28,29,30


In [141]:
df.iloc[5]['brt']

27

- Thank you all
- Good times!!