# Data Indexing and Selection

## Author: Sheikh Irfan Ullah Khan

### Contact Me: shirfan.math@gmail.com

### 1. Data Selection in DataFrame

#### 1.1. DataFrame as a Dictionary

In [1]:
# Import Library
import pandas as pd # Data Manipulation

In [2]:
states_capitals = {
    "Fedral": "Islamabad",
    "KPK": "Peshawar",
    "Balochistan": "Quetta",
    "Punjab": "Lahore",
    "Sindh": "Karachi"
}

In [3]:
states_languages = {
    "Fedral": "Urdu",
    "KPK": "Pashto",
    "Balochistan": "Balochi",
    "Punjab": "Punjabi",
    "Sindh": "Sindhi"
}

In [4]:
df = pd.DataFrame({"Capitals": states_capitals, "Languages": states_languages})
df

Unnamed: 0,Capitals,Languages
Fedral,Islamabad,Urdu
KPK,Peshawar,Pashto
Balochistan,Quetta,Balochi
Punjab,Lahore,Punjabi
Sindh,Karachi,Sindhi


In [5]:
# Creating a DataFrame of students with their subject marks
std_sub = pd.Series({"Eshaal": "Mathematics", "Ahmad": "English", "Faryal": "Science", "Hadi": "Urdu", "Hafsa": "Social"})
tot_mrks = pd.Series({"Eshaal": 100, "Ahmad": 100, "Faryal": 100, "Hadi": 100, "Hafsa": 100})
min_mrks = pd.Series({"Eshaal": 50, "Ahmad": 50, "Faryal": 50, "Hadi": 50, "Hafsa": 50})
obt_mrks = pd.Series({"Eshaal": 98, "Ahmad": 95, "Faryal": 99, "Hadi": 97, "Hafsa": 90})

df1 = pd.DataFrame({"Subjects": std_sub, "Total Marks": tot_mrks, "Minimum Marks": min_mrks, "Obtained Marks": obt_mrks})
df1

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks
Eshaal,Mathematics,100,50,98
Ahmad,English,100,50,95
Faryal,Science,100,50,99
Hadi,Urdu,100,50,97
Hafsa,Social,100,50,90


#### 1.2. How to add new columns in DataFrame

In [6]:
# Create a new column of score in df1
df1["Score"] = (df1["Obtained Marks"] / df1["Total Marks"]) * 100
df1

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score
Eshaal,Mathematics,100,50,98,98.0
Ahmad,English,100,50,95,95.0
Faryal,Science,100,50,99,99.0
Hadi,Urdu,100,50,97,97.0
Hafsa,Social,100,50,90,90.0


In [7]:
df1['Minimum Score'] = (df1['Minimum Marks'] / df1['Total Marks']) * 100
df1

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score,Minimum Score
Eshaal,Mathematics,100,50,98,98.0,50.0
Ahmad,English,100,50,95,95.0,50.0
Faryal,Science,100,50,99,99.0,50.0
Hadi,Urdu,100,50,97,97.0,50.0
Hafsa,Social,100,50,90,90.0,50.0


#### 1.3. DataFrame as Two Dimensional Array

In [8]:
print(df1)
df1.values

           Subjects  Total Marks  Minimum Marks  Obtained Marks  Score  \
Eshaal  Mathematics          100             50              98   98.0   
Ahmad       English          100             50              95   95.0   
Faryal      Science          100             50              99   99.0   
Hadi           Urdu          100             50              97   97.0   
Hafsa        Social          100             50              90   90.0   

        Minimum Score  
Eshaal           50.0  
Ahmad            50.0  
Faryal           50.0  
Hadi             50.0  
Hafsa            50.0  


array([['Mathematics', 100, 50, 98, 98.0, 50.0],
       ['English', 100, 50, 95, 95.0, 50.0],
       ['Science', 100, 50, 99, 99.0, 50.0],
       ['Urdu', 100, 50, 97, 97.0, 50.0],
       ['Social', 100, 50, 90, 90.0, 50.0]], dtype=object)

In [9]:
# How to transpose a DataFrame
df1.T

Unnamed: 0,Eshaal,Ahmad,Faryal,Hadi,Hafsa
Subjects,Mathematics,English,Science,Urdu,Social
Total Marks,100,100,100,100,100
Minimum Marks,50,50,50,50,50
Obtained Marks,98,95,99,97,90
Score,98.0,95.0,99.0,97.0,90.0
Minimum Score,50.0,50.0,50.0,50.0,50.0


In [10]:
# Access the first row of the DataFrame
df1.values[0]

array(['Mathematics', 100, 50, 98, 98.0, 50.0], dtype=object)

In [11]:
# Access the second row of the DataFrame
df1.values[1]

array(['English', 100, 50, 95, 95.0, 50.0], dtype=object)

In [12]:
# Access the 'Score' column
df1['Score']

Eshaal    98.0
Ahmad     95.0
Faryal    99.0
Hadi      97.0
Hafsa     90.0
Name: Score, dtype: float64

In [13]:
# Access the 'Total Marks' column
df1['Total Marks']

Eshaal    100
Ahmad     100
Faryal    100
Hadi      100
Hafsa     100
Name: Total Marks, dtype: int64

In [14]:
df1

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score,Minimum Score
Eshaal,Mathematics,100,50,98,98.0,50.0
Ahmad,English,100,50,95,95.0,50.0
Faryal,Science,100,50,99,99.0,50.0
Hadi,Urdu,100,50,97,97.0,50.0
Hafsa,Social,100,50,90,90.0,50.0


In [15]:
# Implict Slicing
df1[1:3] 

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score,Minimum Score
Ahmad,English,100,50,95,95.0,50.0
Faryal,Science,100,50,99,99.0,50.0


In [16]:
# Explicite Slicing
df1["Ahmad":"Hadi"]

Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score,Minimum Score
Ahmad,English,100,50,95,95.0,50.0
Faryal,Science,100,50,99,99.0,50.0
Hadi,Urdu,100,50,97,97.0,50.0


#### 1.4. Access DataFrame Objects by loc, and iloc Methods

In [17]:
print(df1)

           Subjects  Total Marks  Minimum Marks  Obtained Marks  Score  \
Eshaal  Mathematics          100             50              98   98.0   
Ahmad       English          100             50              95   95.0   
Faryal      Science          100             50              99   99.0   
Hadi           Urdu          100             50              97   97.0   
Hafsa        Social          100             50              90   90.0   

        Minimum Score  
Eshaal           50.0  
Ahmad            50.0  
Faryal           50.0  
Hadi             50.0  
Hafsa            50.0  


In [18]:
# Access first three rows and two columns
df1.iloc[0:3,0:2]

Unnamed: 0,Subjects,Total Marks
Eshaal,Mathematics,100
Ahmad,English,100
Faryal,Science,100


In [19]:
# Accessing last three rows and all columns starting from third column
df1.iloc[2:5, 2:]

Unnamed: 0,Minimum Marks,Obtained Marks,Score,Minimum Score
Faryal,50,99,99.0,50.0
Hadi,50,97,97.0,50.0
Hafsa,50,90,90.0,50.0


In [20]:
print(df1)
df1.loc[: , :]

           Subjects  Total Marks  Minimum Marks  Obtained Marks  Score  \
Eshaal  Mathematics          100             50              98   98.0   
Ahmad       English          100             50              95   95.0   
Faryal      Science          100             50              99   99.0   
Hadi           Urdu          100             50              97   97.0   
Hafsa        Social          100             50              90   90.0   

        Minimum Score  
Eshaal           50.0  
Ahmad            50.0  
Faryal           50.0  
Hadi             50.0  
Hafsa            50.0  


Unnamed: 0,Subjects,Total Marks,Minimum Marks,Obtained Marks,Score,Minimum Score
Eshaal,Mathematics,100,50,98,98.0,50.0
Ahmad,English,100,50,95,95.0,50.0
Faryal,Science,100,50,99,99.0,50.0
Hadi,Urdu,100,50,97,97.0,50.0
Hafsa,Social,100,50,90,90.0,50.0


In [21]:
# Accessing the rows upto 4th row and columns from obtained marks to all
print(df1)
df1.loc[:"Hadi", "Obtained Marks":]

           Subjects  Total Marks  Minimum Marks  Obtained Marks  Score  \
Eshaal  Mathematics          100             50              98   98.0   
Ahmad       English          100             50              95   95.0   
Faryal      Science          100             50              99   99.0   
Hadi           Urdu          100             50              97   97.0   
Hafsa        Social          100             50              90   90.0   

        Minimum Score  
Eshaal           50.0  
Ahmad            50.0  
Faryal           50.0  
Hadi             50.0  
Hafsa            50.0  


Unnamed: 0,Obtained Marks,Score,Minimum Score
Eshaal,98,98.0,50.0
Ahmad,95,95.0,50.0
Faryal,99,99.0,50.0
Hadi,97,97.0,50.0


In [22]:
print(df1)
df1.loc["Ahmad":"Hadi", "Total Marks": "Obtained Marks"]

           Subjects  Total Marks  Minimum Marks  Obtained Marks  Score  \
Eshaal  Mathematics          100             50              98   98.0   
Ahmad       English          100             50              95   95.0   
Faryal      Science          100             50              99   99.0   
Hadi           Urdu          100             50              97   97.0   
Hafsa        Social          100             50              90   90.0   

        Minimum Score  
Eshaal           50.0  
Ahmad            50.0  
Faryal           50.0  
Hadi             50.0  
Hafsa            50.0  


Unnamed: 0,Total Marks,Minimum Marks,Obtained Marks
Ahmad,100,50,95
Faryal,100,50,99
Hadi,100,50,97
