## Pandas DataFrame is two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns.

In [95]:
import pandas as pd
import numpy as np

In [96]:
dict = {'Name' : ['Anjali', 'Ranjan', 'Mike', 'Abdul'],
        'Maths' : [87, 79, 75, 79],
        'Science' : [98, 89, 74, 81]}
df = pd.DataFrame(dict)

In [97]:
df

Unnamed: 0,Name,Maths,Science
0,Anjali,87,98
1,Ranjan,79,89
2,Mike,75,74
3,Abdul,79,81


# To get the Column

In [98]:
#To Fetch One Column:

df['Name']

0    Anjali
1    Ranjan
2      Mike
3     Abdul
Name: Name, dtype: object

In [99]:
#To Fetch Multiple Column:

df[['Name','Maths']]


Unnamed: 0,Name,Maths
0,Anjali,87
1,Ranjan,79
2,Mike,75
3,Abdul,79


In [100]:
#Dataframe Column are just series:

type(df['Name'])

pandas.core.series.Series

## To Create a New Coulumn in a Dataframe

In [101]:
df['sql']=df['Maths'] + 10

In [102]:
df

Unnamed: 0,Name,Maths,Science,sql
0,Anjali,87,98,97
1,Ranjan,79,89,89
2,Mike,75,74,85
3,Abdul,79,81,89


In [103]:
#To Drop a Column in a Dataframe

df=df.drop('sql',axis=1)

In [104]:
df

Unnamed: 0,Name,Maths,Science
0,Anjali,87,98
1,Ranjan,79,89
2,Mike,75,74
3,Abdul,79,81


In [105]:
df.count()

Name       4
Maths      4
Science    4
dtype: int64

## Working on Rows

In [106]:
dict = {'Name' : ['Dylan', 'Clapton', 'Johnson', 'Adams'],
        'Zip' : [87, 79, 75, 79],
        'Phone' : [980, 890, 740, 810]}
index=['A','B','C','D']
df1 = pd.DataFrame(dict,index)

In [107]:
df1

Unnamed: 0,Name,Zip,Phone
A,Dylan,87,980
B,Clapton,79,890
C,Johnson,75,740
D,Adams,79,810


## Selecting one row by name

In [38]:
df1.loc['A']

Name     Dylan
Zip         87
Phone      980
Name: A, dtype: object

## Selecting Multiple row by name

In [41]:
df1.loc[['A','C']]

Unnamed: 0,Name,Zip,Phone
A,Dylan,87,980
C,Johnson,75,740


## Select single row by integer index location

In [110]:
df1.iloc[0]

Name     Dylan
Zip         87
Phone      980
Name: A, dtype: object

## Select Multiple row by integer index location

In [111]:
df1.iloc[0:2]

Unnamed: 0,Name,Zip,Phone
A,Dylan,87,980
B,Clapton,79,890


## Removing Rows from a Dataframe

In [112]:
df1=df1.drop('D',axis=0)

In [113]:
df1

Unnamed: 0,Name,Zip,Phone
A,Dylan,87,980
B,Clapton,79,890
C,Johnson,75,740


## Selecting subset of rows and columns at same time

In [114]:
df1.loc[['A','C'],['Name','Phone']]

Unnamed: 0,Name,Phone
A,Dylan,980
C,Johnson,740


## Conditional Selection

In [115]:
dict = {'Q1' : [100,1000,1001,1002],
        'Q2' : [-870, 790, -750, 790],
        'Q3' : [980, -890, 740, -810]}
index=['A','B','C','D']
df2 = pd.DataFrame(dict,index)

In [116]:
df2

Unnamed: 0,Q1,Q2,Q3
A,100,-870,980
B,1000,790,-890
C,1001,-750,740
D,1002,790,-810


In [117]:
df2 > 0

Unnamed: 0,Q1,Q2,Q3
A,True,False,True
B,True,True,False
C,True,False,True
D,True,True,False


In [118]:
df2[df2 > 0]

Unnamed: 0,Q1,Q2,Q3
A,100,,980.0
B,1000,790.0,
C,1001,,740.0
D,1002,790.0,


## For two conditions simulataneiusly you can use | and & with parenthesis

In [119]:
df2

Unnamed: 0,Q1,Q2,Q3
A,100,-870,980
B,1000,790,-890
C,1001,-750,740
D,1002,790,-810


In [120]:
df2[(df2['Q1'] > 0) & (df2['Q2'] < 0)]

Unnamed: 0,Q1,Q2,Q3
A,100,-870,980
C,1001,-750,740


## To Get the info

In [121]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, A to D
Data columns (total 3 columns):
Q1    4 non-null int64
Q2    4 non-null int64
Q3    4 non-null int64
dtypes: int64(3)
memory usage: 128.0+ bytes


## To Get the Columns

In [122]:
df2.columns

Index(['Q1', 'Q2', 'Q3'], dtype='object')

## To Reset the Index

In [123]:
df2.head()

Unnamed: 0,Q1,Q2,Q3
A,100,-870,980
B,1000,790,-890
C,1001,-750,740
D,1002,790,-810


In [124]:
#The index will be a column and new index will be there with (0..)
df2.reset_index()

Unnamed: 0,index,Q1,Q2,Q3
0,A,100,-870,980
1,B,1000,790,-890
2,C,1001,-750,740
3,D,1002,790,-810


In [127]:
#To set index i.e Name is the Index here not the Column
df.set_index('Name')

Unnamed: 0_level_0,Maths,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Anjali,87,98
Ranjan,79,89
Mike,75,74
Abdul,79,81
