# Pandas DataFrame overview

- A DataFrame represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.).The DataFrame has both a row and column index

# DataFrame

In [2]:
import pandas as pd

In [3]:
umar = pd.Series([1,2,5,6,8,9])
zain = pd.Series([4,3,5,6,7,8])
print(umar,zain)

0    1
1    2
2    5
3    6
4    8
5    9
dtype: int64 0    4
1    3
2    5
3    6
4    7
5    8
dtype: int64


In [5]:
umar = pd.Series([1,2,5,6,8,9])
zain = pd.Series([4,3,5,6,7,8])
data = {'umars':umar,"zains":'zain'}
df = pd.DataFrame(data)
df

Unnamed: 0,umars,zains
0,1,zain
1,2,zain
2,5,zain
3,6,zain
4,8,zain
5,9,zain


In [13]:
umar = pd.Series([1,3,4,5], index=['a','e','i','o'])
zain = pd.Series([12,34,56,78],index=['jan','feb','mar','apr'])
data = {'umar':umar,'zain':zain}
df = pd.DataFrame(data)
df
# We are facing NaN values because indexes are not matches.

Unnamed: 0,umar,zain
a,1.0,
apr,,78.0
e,3.0,
feb,,34.0
i,4.0,
jan,,12.0
mar,,56.0
o,5.0,


In [14]:
umar = pd.Series([1,3,4,5], index=['a','e','i','o'])
zain = pd.Series([12,34,56,78],index=['a','e','i','o'])
data = {'umar':umar,'zain':zain}
df = pd.DataFrame(data)
df

Unnamed: 0,umar,zain
a,1,12
e,3,34
i,4,56
o,5,78


## with assigning indexing

In [20]:

students = ['Umar','Zain','Asfand','Faizan','Irfan']
data = {'Students':students,
        'Numbers':[934,820,681,988,846],
        'age_year':[2005,2004,2006,2007,2001]}
df = pd.DataFrame(data,
                index =['1st','2nd','3rd','4th','5th'])
df

Unnamed: 0,Students,Numbers,age_year
1st,Umar,934,2005
2nd,Zain,820,2004
3rd,Asfand,681,2006
4th,Faizan,988,2007
5th,Irfan,846,2001


## without assigning indexing

In [17]:
students = ['Umar','Zain','Asfand','Faizan','Irfan']
data = {'Students':students,
        'Numbers':[934,820,681,988,846],
        'age_year':[2005,2004,2006,2007,2001]}
df = pd.DataFrame(data)
df

Unnamed: 0,Students,Numbers,age_year
0,Umar,934,2005
1,Zain,820,2004
2,Asfand,681,2006
3,Faizan,988,2007
4,Irfan,846,2001


## By chnging the positions of columns

In [28]:
df_1 = pd.DataFrame(df,columns=['age_year','Numbers','Students'])
df_1

Unnamed: 0,age_year,Numbers,Students
1st,2005,934,Umar
2nd,2004,820,Zain
3rd,2006,681,Asfand
4th,2007,988,Faizan
5th,2001,846,Irfan


## By changing the names of columns

In [18]:
df_1 = pd.DataFrame(df,columns=['jibran','usman','imran'])
df_1

Unnamed: 0,jibran,usman,imran
0,,,
1,,,
2,,,
3,,,
4,,,


In [30]:
df_2 =pd.DataFrame(data,
                columns=['Students','Numbers','age_year','Grading','age'],
                index=['one','two','three','four','five'])
df_2

Unnamed: 0,Students,Numbers,age_year,Grading,age
one,Umar,934,2005,,
two,Zain,820,2004,,
three,Asfand,681,2006,,
four,Faizan,988,2007,,
five,Irfan,846,2001,,


In [31]:
df_2.head(3)

Unnamed: 0,Students,Numbers,age_year,Grading,age
one,Umar,934,2005,,
two,Zain,820,2004,,
three,Asfand,681,2006,,


In [32]:
df_2.tail(2)

Unnamed: 0,Students,Numbers,age_year,Grading,age
four,Faizan,988,2007,,
five,Irfan,846,2001,,


## Displaying the names of columns and indexes.

In [33]:
print(df_2.columns)
print()
print(df_2.index)

Index(['Students', 'Numbers', 'age_year', 'Grading', 'age'], dtype='object')

Index(['one', 'two', 'three', 'four', 'five'], dtype='object')


A column in a DataFrame can be retrieved as a Series either by dict-like notation or by attribute:
-

dictionary liken notation to access or extract
-

In [35]:
print(df_2['Students'])
print()
df_2['age_year']

one        Umar
two        Zain
three    Asfand
four     Faizan
five      Irfan
Name: Students, dtype: object



one      2005
two      2004
three    2006
four     2007
five     2001
Name: age_year, dtype: int64

## Ther is another method.

In [36]:
df_2.Numbers

one      934
two      820
three    681
four     988
five     846
Name: Numbers, dtype: int64

## Use of loc

In [38]:
df_2.loc['five'] # selected a row

Students    Irfan
Numbers       846
age_year     2001
Grading       NaN
age           NaN
Name: five, dtype: object

In [42]:
df_2.loc[:,'Students'] # Selected a column

one        Umar
two        Zain
three    Asfand
four     Faizan
five      Irfan
Name: Students, dtype: object

In [45]:
df_2.isnull().sum()

Students    0
Numbers     0
age_year    0
Grading     5
age         5
dtype: int64

In [48]:
# without indexing
umar = pd.Series([23,44,56,34,23])
df_2['age']=umar
df_2

Unnamed: 0,Students,Numbers,age_year,Grading,age
one,Umar,934,2005,,
two,Zain,820,2004,,
three,Asfand,681,2006,,
four,Faizan,988,2007,,
five,Irfan,846,2001,,


In [49]:
umar = pd.Series([23,44,56,34,23],index=['two','three','four','one','five'])
df_2['age']=umar
df_2

Unnamed: 0,Students,Numbers,age_year,Grading,age
one,Umar,934,2005,,34
two,Zain,820,2004,,23
three,Asfand,681,2006,,44
four,Faizan,988,2007,,56
five,Irfan,846,2001,,23


## formation of dataframe by using numpy and deleting a column or row from data frame.

In [50]:
import numpy as np

In [60]:
df_3 = pd.DataFrame(np.arange(20).reshape((5,4)),
                    index=['one','two','three','four','five'],
                    columns=['Pak','Ind','Eng','Bang'])
df_3

Unnamed: 0,Pak,Ind,Eng,Bang
one,0,1,2,3
two,4,5,6,7
three,8,9,10,11
four,12,13,14,15
five,16,17,18,19


## Drop a column from the dataframe.

In [61]:
df_3.drop('Pak',axis=1)


Unnamed: 0,Ind,Eng,Bang
one,1,2,3
two,5,6,7
three,9,10,11
four,13,14,15
five,17,18,19


In [64]:
df_3.drop('five',axis=0,inplace=True)
df_3

Unnamed: 0,Pak,Ind,Eng,Bang
one,0,1,2,3
two,4,5,6,7
three,8,9,10,11
four,12,13,14,15


## Selection with loc and iloc

In [3]:
df = pd.DataFrame(np.arange(25).reshape((5,5)),
                    index=['one','two','three','four','five'],
                    columns=['potato','egg','shake','weight','umar'])
df

Unnamed: 0,potato,egg,shake,weight,umar
one,0,1,2,3,4
two,5,6,7,8,9
three,10,11,12,13,14
four,15,16,17,18,19
five,20,21,22,23,24


In [5]:
df.loc[['one','two'],['egg','shake']]

Unnamed: 0,egg,shake
one,1,2
two,6,7


In [7]:
df.iloc[3,[3,4]]

weight    18
umar      19
Name: four, dtype: int32

In [6]:
df.iloc[2:,[3,0,1]]

Unnamed: 0,weight,potato,egg
three,13,10,11
four,18,15,16
five,23,20,21


In [10]:
df.iloc[:,:] # drag all the data

Unnamed: 0,potato,egg,shake,weight,umar
one,0,1,2,3,4
two,5,6,7,8,9
three,10,11,12,13,14
four,15,16,17,18,19
five,20,21,22,23,24


## Arithematic operations on the dataframe

In [15]:
df1 = pd.DataFrame(np.arange(15).reshape((3,5)),
                    columns = list('abcde'))
df2 = pd.DataFrame(np.arange(25).reshape((5,5)),
                    columns = list('abcde'))
df1
#df2

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14


In [16]:
df2

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14
3,15,16,17,18,19
4,20,21,22,23,24


In [18]:
df3 = df1+df2
df3

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,8.0
1,10.0,12.0,14.0,16.0,18.0
2,20.0,22.0,24.0,26.0,28.0
3,,,,,
4,,,,,


In [20]:
df3 = df1.add(df2,fill_value=0)
df3

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,8.0
1,10.0,12.0,14.0,16.0,18.0
2,20.0,22.0,24.0,26.0,28.0
3,15.0,16.0,17.0,18.0,19.0
4,20.0,21.0,22.0,23.0,24.0
