# Data Selection

In [2]:
import pandas as pd
import numpy as np

In [4]:
s1 = pd.Series(['Abhijith', 'Bhargav', 'Phani', 'Manav'])
s2 = pd.Series([97,98,99,100])
s3 = pd.Series([79,89,69,59]) # Series have been created

In [9]:
dct = {'Name':s1, 'Maths':s2, 'Science':s3}

In [10]:
dct

{'Name': 0    Abhijith
 1     Bhargav
 2       Phani
 3       Manav
 dtype: object,
 'Maths': 0     97
 1     98
 2     99
 3    100
 dtype: int64,
 'Science': 0    79
 1    89
 2    69
 3    59
 dtype: int64}

In [13]:
df_sub = pd.DataFrame(dct) # Creation of a dataframe
df_sub

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69
3,Manav,100,59


In [14]:
# Selecting specific columns to display
df_sub[['Name','Science']]

Unnamed: 0,Name,Science
0,Abhijith,79
1,Bhargav,89
2,Phani,69
3,Manav,59


In [15]:
# Selecting top 3 rows
df_sub[:3]

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69


### Selection by label - .loc 

In [16]:
# Select name columns and all rows
df_sub.loc[:,['Name']]

Unnamed: 0,Name
0,Abhijith
1,Bhargav
2,Phani
3,Manav


In [19]:
# Select 2nd and 4th rows with name and science column
df_sub.loc[[1,3],['Name','Science']]

Unnamed: 0,Name,Science
1,Bhargav,89
3,Manav,59


In [22]:
# Creating new columns ID and Age
df_sub.loc[:,'ID'] = [1,2,3,4] # For different values use list
df_sub.loc[:,'Age'] = 23 # For same values just declare it
df_sub

Unnamed: 0,Name,Maths,Science,ID,Age
0,Abhijith,97,79,1,23
1,Bhargav,98,89,2,23
2,Phani,99,69,3,23
3,Manav,100,59,4,23


In [26]:
# Dropping columns
df_sub = df_sub.drop(columns = ['Age'])
df_sub

Unnamed: 0,Name,Maths,Science,ID
0,Abhijith,97,79,1
1,Bhargav,98,89,2
2,Phani,99,69,3
3,Manav,100,59,4


In [28]:
# Second method for dropping columns
df_sub = df_sub.drop(['ID'], axis = 1)
df_sub

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69
3,Manav,100,59


In [29]:
df_sub

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69
3,Manav,100,59


---

### Selection of position - .iloc 

In [30]:
# This is generally used if there are many columns typing the name would be difficult so using iloc we can give indexes 
# All rows and 1st 2 columns
df_sub.iloc[:,:2]

Unnamed: 0,Name,Maths
0,Abhijith,97
1,Bhargav,98
2,Phani,99
3,Manav,100


In [31]:
# Selecting 2nd and 4th row using math and science column
df_sub.iloc[[1,3],1:3]

Unnamed: 0,Maths,Science
1,98,89
3,100,59


In [32]:
# We cannot create a new column using iloc

---

### Modifying and filtering values using loc 

In [33]:
df_sub

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69
3,Manav,100,59


In [34]:
# Changing Manav's name to Adharv
# Using loc
df_sub.loc[df_sub['Name'] == 'Manav', 'Name'] = 'Adharv'
df_sub

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89
2,Phani,99,69
3,Adharv,100,59


In [35]:
# This is not possible in iloc because it doesn't take these statements as arguments
# To get science score that is greater than or equal to 90
df_sub.loc[df_sub['Science'] >= 70]

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
1,Bhargav,98,89


In [36]:
# Wwe just need the output to be name and science marks column
df_sub.loc[df_sub['Science'] >= 70, ['Name','Science']]

Unnamed: 0,Name,Science
0,Abhijith,79
1,Bhargav,89


In [37]:
# Get all the scores for Abhijith and Phani using .isin 
df_sub.loc[df_sub['Name'].isin(['Abhijith','Phani'])]

Unnamed: 0,Name,Maths,Science
0,Abhijith,97,79
2,Phani,99,69


---