In [1]:
import pandas as pd
import numpy as np

In [2]:
"""
Series indexing (obj[...]) works analogously to Numpy array indexing, except you can use the series's index values instead of 
only integers.
"""

car_names = pd.Series(['Bugatti', 'Lamborghini', 'Rolls Royce', 'Mustang', 'Skyline'], index = [1, 2, 3, 4, 5])

In [3]:
car_names

1        Bugatti
2    Lamborghini
3    Rolls Royce
4        Mustang
5        Skyline
dtype: object

In [4]:
car_names[2]

'Lamborghini'

In [5]:
car_names[1]

'Bugatti'

In [6]:
car_names[2:4]

3    Rolls Royce
4        Mustang
dtype: object

In [7]:
car_names[[2, 1, 4]]

2    Lamborghini
1        Bugatti
4        Mustang
dtype: object

In [8]:
car_names[[1, 3]]

1        Bugatti
3    Rolls Royce
dtype: object

In [9]:
# Slicing with labels behaves differently than normal python slicing in that the end point is inclusive:

car_names[2:3]

3    Rolls Royce
dtype: object

In [10]:
# Setting using these methods modifies the corresponding section of the series:

car_names[2:3] = 'Koenigsegg jesko'

In [11]:
car_names

1             Bugatti
2         Lamborghini
3    Koenigsegg jesko
4             Mustang
5             Skyline
dtype: object

In [12]:
# Indexing into a DataFrame is for reteriving one or more columns either with a single value or sequence:

# With DataFrame, index values can be deleted from either axis. To illustrate this, we first create an example DataFrame.

car_data = pd.DataFrame(np.arange(16).reshape(4,4), index = ['Bugatti', 'Lamborghini', 'Rolls Royce', 'Koenigsegg jesko'],
                   columns = ['One', 'Two', 'Three', 'Four'])

In [13]:
car_data

Unnamed: 0,One,Two,Three,Four
Bugatti,0,1,2,3
Lamborghini,4,5,6,7
Rolls Royce,8,9,10,11
Koenigsegg jesko,12,13,14,15


In [14]:
car_data['Two']

Bugatti              1
Lamborghini          5
Rolls Royce          9
Koenigsegg jesko    13
Name: Two, dtype: int32

In [15]:
car_data[['Three', 'One']]

Unnamed: 0,Three,One
Bugatti,2,0
Lamborghini,6,4
Rolls Royce,10,8
Koenigsegg jesko,14,12


In [16]:
# Indexing like this has a few special cases, first, slicing or selecting data with a boolean array:

car_data[:2]

Unnamed: 0,One,Two,Three,Four
Bugatti,0,1,2,3
Lamborghini,4,5,6,7


In [17]:
car_data[car_data['Three'] > 5]

Unnamed: 0,One,Two,Three,Four
Lamborghini,4,5,6,7
Rolls Royce,8,9,10,11
Koenigsegg jesko,12,13,14,15


In [18]:
"""
The row selection syntax data [:2] is provided as a convenience. Passing a single element or a list to the [] operator selects
columns.
Another use case is in indexing with boolean DataFrame, such as one produced by a scalar comparison
"""

car_data < 5

Unnamed: 0,One,Two,Three,Four
Bugatti,True,True,True,True
Lamborghini,True,False,False,False
Rolls Royce,False,False,False,False
Koenigsegg jesko,False,False,False,False


In [19]:
car_data[car_data < 5] = 0

In [20]:
car_data

Unnamed: 0,One,Two,Three,Four
Bugatti,0,0,0,0
Lamborghini,0,5,6,7
Rolls Royce,8,9,10,11
Koenigsegg jesko,12,13,14,15


In [21]:
# This makes DataFrame syntatically more like a two-dimensional Numpy array in this particular case:

# Selection with loc and iloc

In [22]:
"""
For DataFrame label-indexing on the rows, I introduce the special indexing operators loc and iloc. They enable you to select a 
subset of the rows and columns from a DataFrame with Numpy-like notation using either axis labels (loc) or integers (iloc)
"""

# As s preliminary example, let's select a single row and multiple columns by label:

car_data.loc['Lamborghini', ['Two', 'Three']]

Two      5
Three    6
Name: Lamborghini, dtype: int32

In [23]:
# We'll then perform some similar selections with integers using iloc

car_data.iloc[2, [3, 0, 1]]

Four    11
One      8
Two      9
Name: Rolls Royce, dtype: int32

In [24]:
car_data.iloc[2]

One       8
Two       9
Three    10
Four     11
Name: Rolls Royce, dtype: int32

In [25]:
car_data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,Four,One,Two
Lamborghini,7,0,5
Rolls Royce,11,8,9


In [26]:
# Both indexing functions work with slices in addition to single labels or lists of labels:

car_data.loc[:'Rolls Royce', 'Two']

Bugatti        0
Lamborghini    5
Rolls Royce    9
Name: Two, dtype: int32

In [27]:
car_data.iloc[:,:3][car_data.Three > 5]

Unnamed: 0,One,Two,Three
Lamborghini,0,5,6
Rolls Royce,8,9,10
Koenigsegg jesko,12,13,14


In [28]:
# Type: df[val]
# Notes: Select single column or sequence of columns from the DataFrame; special case conveniences: boolean array (filter rows), slice (slice rows), or boolean DataFrame (set values based on some criterion)

# 1. Single Column Selection:

# Create a DataFrame
data = {'Name': ['Anshu', 'Sneha', 'Samiksha'],
        'Age': [20, 19, 18],
        'City': ['Chandigarh', 'Gurgaon', 'Delhi']}
df = pd.DataFrame(data)

# Select a single column
name_column = df['Name']
print(name_column)


0       Anshu
1       Sneha
2    Samiksha
Name: Name, dtype: object


In [29]:
# 2. Sequence of columns selection:

# Select multiple columns
name_age_columns = df[['Name', 'Age']]
print(name_age_columns)

       Name  Age
0     Anshu   20
1     Sneha   19
2  Samiksha   18


In [30]:
# 3. Boolean array (filter rows):

# Filter rows based on a condition
age_filter = df['Age'] > 18
filtered_rows = df[age_filter]
print(filtered_rows)

    Name  Age        City
0  Anshu   20  Chandigarh
1  Sneha   19     Gurgaon


In [31]:
# 4. Slice (slice rows):

# Slice rows
sliced_rows = df[1:3]
print(sliced_rows)

       Name  Age     City
1     Sneha   19  Gurgaon
2  Samiksha   18    Delhi


In [32]:
# 5. Boolean DataFrame (set values based on some criterion):

# Create a boolean DataFrame for selection
boolean_df = pd.DataFrame({'Name': [True, False, True],
                           'Age': [False, True, False],
                           'City': [True, True, False]})

# Select values based on the boolean DataFrame
selected_values = df[boolean_df]
print(selected_values)

       Name   Age        City
0     Anshu   NaN  Chandigarh
1       NaN  19.0     Gurgaon
2  Samiksha   NaN         NaN


In [33]:
# Type: df.loc[val]
# Notes: Selects single row or subset of rows from the DataFrame by label

# 1. Single row selection:

# Create a DataFrame
data = {'Name': ['Anshu', 'Sneha', 'Samiksha'],
        'Age': [20, 19, 18],
        'City': ['Chandigarh', 'Gurgaon', 'Delhi']}
df = pd.DataFrame(data, index=['A', 'B', 'C'])

# Select a single row by label
row_A = df.loc['A']
print(row_A)


Name         Anshu
Age             20
City    Chandigarh
Name: A, dtype: object


In [34]:
# 2. List of labels selection:

# Select multiple rows by labels
rows_A_C = df.loc[['A', 'C']]
print(rows_A_C)

       Name  Age        City
A     Anshu   20  Chandigarh
C  Samiksha   18       Delhi


In [35]:
# 3. Slice of labels selection:

# Slice rows by labels
sliced_rows = df.loc['B':'C']
print(sliced_rows)

       Name  Age     City
B     Sneha   19  Gurgaon
C  Samiksha   18    Delhi


In [36]:
# 4. Boolean array selection:

# Create a boolean array for row selection
boolean_array = [False, True, False]

# Select rows based on the boolean array
selected_rows = df.loc[boolean_array]
print(selected_rows)

    Name  Age     City
B  Sneha   19  Gurgaon


In [37]:
# Type: df.loc[:, val]
# Notes: Selects single column or subset of columns by label

# Create a DataFrame
data = {'Name': ['Anshu', 'Sneha', 'Samiksha'],
        'Age': [20, 19, 18],
        'City': ['Chandigarh', 'Gurgaon', 'Delhi']}
df = pd.DataFrame(data)

# Select a single column by label
name_column = df.loc[:, 'Name']
print(name_column)

0       Anshu
1       Sneha
2    Samiksha
Name: Name, dtype: object


In [38]:
# You can also select multiple columns by providing a list of labels:

# Select multiple columns by labels
name_age_columns = df.loc[:, ['Name', 'Age']]
print(name_age_columns)

       Name  Age
0     Anshu   20
1     Sneha   19
2  Samiksha   18


In [39]:
# Type: df.loc[val1, val2]
# Notes: Select both rows and columns by label

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['Chandigarh', 'Gurgaon', 'Delhi', 'Dehradun']
}
df = pd.DataFrame(data)

# Select specific rows and columns by label
selected_data = df.loc[1:2,['Name', 'Age']]

print(selected_data)

       Name  Age
1     Sneha   19
2  Samiksha   18


In [40]:
# Type: df.iloc[where]
# Notes: Selects single row or subset of rows from the DataFrame by integer position 

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Select a single row by integer position
row1 = df.iloc[1]

print(row1)

Name        Sneha
Age            19
Country    Canada
Name: 1, dtype: object


In [41]:
# You can also select multiple rows by providing a list of integer positions:

# Select a subset of rows by integer positions
subset_rows = df.iloc[[0, 1, 2]]

print(subset_rows)

       Name  Age Country
0     Anshu   20     USA
1     Sneha   19  Canada
2  Samiksha   18      UK


In [42]:
# Type: df.iloc[:, where] 
# Notes: Selects single column or subset of columns by integer position

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Select a single column by integer position
column1 = df.iloc[:, 1]

print(column1)

0    20
1    19
2    18
3    18
Name: Age, dtype: int64


In [43]:
# You can also select multiple columns by providing a list of integer positions:

# Select a subset of columns by integer positions
subset_columns = df.iloc[:, [0, 2]]

print(subset_columns)

       Name    Country
0     Anshu        USA
1     Sneha     Canada
2  Samiksha         UK
3  Abhijeet  Australia


In [44]:
# Type: df.iloc[where_i, where_j] 
# Notes: Select both rows and columns by integer position

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Select specific rows and columns by integer position
selected_data = df.iloc[1:3, 0:2]

print(selected_data)

       Name  Age
1     Sneha   19
2  Samiksha   18


In [45]:
# You can also use specific integer positions or lists of integer positions to select rows and columns:

# Select specific rows and columns by integer positions
selected_data = df.iloc[[0, 2, 3], [1, 2]]

print(selected_data)

   Age    Country
0   20        USA
2   18         UK
3   18  Australia


In [46]:
# Type: df.at[label_i, label_j] 
# Notes: Select a single scalar value by row and column label

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Select a single scalar value by row and column label
value = df.at[1, 'Name']

print(value)

Sneha


In [47]:
# Type: df.iat[i, j] 
# Notes: Select a single scalar value by row and column position (integers)

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Select a single scalar value by row and column position
value = df.iat[1, 0]

print(value)

Sneha


In [48]:
# Type: reindex method 
# Notes: Select either rows or columns by labels

# Create a sample DataFrame
data = {
    'Name': ['Anshu', 'Sneha', 'Samiksha', 'Abhijeet'],
    'Age': [20, 19, 18, 18],
    'Country': ['USA', 'Canada', 'UK', 'Australia']
}
df = pd.DataFrame(data)

# Reindex rows using labels
new_rows = df.reindex(['A', 'B', 'C', 'D'])

print(new_rows)

  Name  Age Country
A  NaN  NaN     NaN
B  NaN  NaN     NaN
C  NaN  NaN     NaN
D  NaN  NaN     NaN


In [49]:
# Similarly, you can reindex columns by specifying the columns parameter:

# Reindex columns using labels
new_columns = df.reindex(columns=['Name', 'Country', 'Age'])

print(new_columns)

       Name    Country  Age
0     Anshu        USA   20
1     Sneha     Canada   19
2  Samiksha         UK   18
3  Abhijeet  Australia   18
