[Reference](https://medium.com/@ghanshyamsavaliya/110-quick-smart-filter-ways-in-python-dataframe-b48c0833fc91)

In [1]:
import pandas as pd

# Create a sample DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# Select column 'A'
column_A = df['A']
# Select column 'A' using dot notation
column_A_dot = df.A

In [2]:
# Select column 'A'
column_A = df['A']
print(column_A)

0    1
1    2
2    3
Name: A, dtype: int64


In [3]:
# Select column 'A' using dot notation
column_A_dot = df.A
print(column_A_dot)

0    1
1    2
2    3
Name: A, dtype: int64


In [4]:
# Select column 'A' using loc[]
column_A_loc = df.loc[:, 'A']
print(column_A_loc)

0    1
1    2
2    3
Name: A, dtype: int64


In [5]:
# Select column at index 0 using iloc[]
column_0_iloc = df.iloc[:, 0]
print(column_0_iloc)

0    1
1    2
2    3
Name: A, dtype: int64


In [7]:
# # Select column 'A' using ix[]
# column_A_ix = df.ix[:, 'A']
# print(column_A_ix)

In [8]:
# Select column 'A' using get()
column_A_get = df.get('A')
print(column_A_get)

0    1
1    2
2    3
Name: A, dtype: int64


In [9]:
# Select columns 'A' and 'B' using loc[]
columns_AB_loc = df.loc[:, ['A', 'B']]
print(columns_AB_loc)

   A  B
0  1  4
1  2  5
2  3  6


In [10]:
# Select columns at index 0 and 1 using iloc[]
columns_01_iloc = df.iloc[:, [0, 1]]
print(columns_01_iloc)

   A  B
0  1  4
1  2  5
2  3  6


In [11]:
# Select columns 'A' and 'B' using loc[] and slicing
columns_AB_slice_loc = df.loc[:, 'A':'B']
print(columns_AB_slice_loc)

   A  B
0  1  4
1  2  5
2  3  6


In [12]:
# Select columns at index 0 and 1 using iloc[] and slicing
columns_01_slice_iloc = df.iloc[:, 0:2]
print(columns_01_slice_iloc)

   A  B
0  1  4
1  2  5
2  3  6


In [13]:
# Select columns 'A' where values in column 'B' are greater than 4
columns_A_where_B_loc = df.loc[df['B'] > 4, 'A']
print(columns_A_where_B_loc)

1    2
2    3
Name: A, dtype: int64


In [15]:
# # Select columns at index 0 where values in column 'B' are greater than 4
# columns_0_where_B_iloc = df.iloc[df['B'] > 4, 0]
# print(columns_0_where_B_iloc)

In [16]:
# Select columns where the mean of each column is greater than 2
columns_mean_greater_than_2 = df.loc[:, lambda df: df.mean() > 2]
print(columns_mean_greater_than_2)

   B
0  4
1  5
2  6


In [17]:
# Select columns using callable function
columns_callable_iloc = df.iloc[:, lambda df: [0, 1]]
print(columns_callable_iloc)

   A  B
0  1  4
1  2  5
2  3  6


In [18]:
# Select columns where column names contain 'A'
columns_with_A = df.filter(like='A')
print(columns_with_A)

   A
0  1
1  2
2  3


In [19]:
# Select columns where values in column 'A' are greater than 2
columns_A_query = df.query('A > 2')
print(columns_A_query)

   A  B
2  3  6


In [20]:
# Select columns at indices 0 and 1 using iloc[]
columns_01_index_iloc = df.iloc[:, [0, 1]]
print(columns_01_index_iloc)

   A  B
0  1  4
1  2  5
2  3  6


In [21]:
# Select columns 'A' and 'B' using loc[]
columns_AB_index_loc = df.loc[:, ['A', 'B']]
print(columns_AB_index_loc)

   A  B
0  1  4
1  2  5
2  3  6


In [22]:
# Select column 'A' at row index 0 using loc[]
column_A_row_0_loc = df.loc[0, 'A']
print(column_A_row_0_loc)

1


In [23]:
# Select column at index 0 at row index 0 using iloc[]
column_0_row_0_iloc = df.iloc[0, 0]
print(column_0_row_0_iloc)

1


In [24]:
# Select column 'A' at row index 0 using at[]
column_A_row_0_at = df.at[0, 'A']
print(column_A_row_0_at)

1


In [25]:
# Select column at index 0 at row index 0 using iat[]
column_0_row_0_iat = df.iat[0, 0]
print(column_0_row_0_iat)

1


In [26]:
# Select columns with names containing 'A' or 'B' using filter and regex
columns_AB_filter = df.filter(regex='A|B')
print(columns_AB_filter)

   A  B
0  1  4
1  2  5
2  3  6


In [27]:
# Select column 'A' using style function
column_A_style = df.style.apply(lambda x: ['background: yellow' if val > 2 else '' for val in x], subset=['A'])
print(column_A_style)

<pandas.io.formats.style.Styler object at 0x7d4e58bfd150>


In [29]:
import numpy as np
# Select columns using numpy array slicing
columns_np_slicing = df.iloc[:, np.s_[:2]]  # Select columns 0 and 1
print(columns_np_slicing)

   A  B
0  1  4
1  2  5
2  3  6


In [30]:
# Select columns where column names contain 'A'
columns_with_A_lc = df[[col for col in df.columns if 'A' in col]]
print(columns_with_A_lc)

   A
0  1
1  2
2  3


In [31]:
# Define a function to select columns
def select_columns(data_frame, col_prefix):
    return data_frame.filter(like=col_prefix)

# Select columns with names containing 'A'
selected_columns = select_columns(df, 'A')
print(selected_columns)

   A
0  1
1  2
2  3


In [32]:
# Select column 'A' using the .xs() method
column_A_xs = df.xs('A', axis=1)
print(column_A_xs)

0    1
1    2
2    3
Name: A, dtype: int64


In [33]:
# Drop columns 'A' and 'B'
remaining_columns = df.drop(columns=['A', 'B'])
print(remaining_columns)

Empty DataFrame
Columns: []
Index: [0, 1, 2]


In [34]:
# Select all columns except 'A'
columns_except_A = df[df.columns.difference(['A'])]
print(columns_except_A)

   B
0  4
1  5
2  6


In [35]:
# Sort columns in alphabetical order
columns_alphabetical = df[sorted(df.columns)]
print(columns_alphabetical)

   A  B
0  1  4
1  2  5
2  3  6


In [36]:
# Select column 'A' using index operator
column_A_index = df['A']
print(column_A_index)

0    1
1    2
2    3
Name: A, dtype: int64


In [40]:
# Create a sample MultiIndex DataFrame
multi_index = pd.MultiIndex.from_tuples([('A', 'X'), ('A', 'Y'), ('B', 'X')], names=['First', 'Second'])
df_multi = pd.DataFrame({'Value': [1, 2, 3]}, index=multi_index)

# Select rows where 'First' level is 'A'
row_A_xs = df_multi.xs('A', level='First')
print(row_A_xs)

        Value
Second       
X           1
Y           2


In [41]:
# Create a sample DataFrame with MultiIndex columns
df_multi_columns = pd.DataFrame({('A', 'X'): [1, 2], ('B', 'Y'): [3, 4]})

# Select column ('A', 'X')
column_AX_multi = df_multi_columns[('A', 'X')]
print(column_AX_multi)

0    1
1    2
Name: (A, X), dtype: int64


In [42]:
# Get the integer index of column 'A'
column_A_index_loc = df.columns.get_loc('A')
print(column_A_index_loc)

0


In [43]:
# Get the integer indices of columns 'A' and 'B'
columns_AB_indices = df.columns.get_indexer(['A', 'B'])
print(columns_AB_indices)

[0 1]


In [44]:
# Select columns using regular expression
columns_regex_filter = df.filter(regex='[A-B]')
print(columns_regex_filter)

   A  B
0  1  4
1  2  5
2  3  6


In [45]:
# Create another DataFrame with different column order
data_other = {'B': [4, 5, 6], 'A': [1, 2, 3]}
df_other = pd.DataFrame(data_other)

# Select columns from df_other in the order of df
columns_order_other = df_other[df.columns]
print(columns_order_other)

   A  B
0  1  4
1  2  5
2  3  6


In [46]:
# Create another DataFrame with different column order
data_other = {'B': [4, 5, 6], 'A': [1, 2, 3]}
df_other = pd.DataFrame(data_other)

# Select columns from df_other based on column indices of df
columns_index_order_other = df_other.iloc[:, df.columns.get_indexer(df_other.columns)]
print(columns_index_order_other)

   A  B
0  1  4
1  2  5
2  3  6


In [47]:
# Reorder columns using .reindex()
columns_reorder = df.reindex(columns=['B', 'A'])
print(columns_reorder)

   B  A
0  4  1
1  5  2
2  6  3


In [49]:
# # Create a sample DataFrame with duplicate column names
# data_duplicates = {'A': [1, 2, 3], 'B': [4, 5, 6], 'A': [7, 8, 9]}
# df_duplicates = pd.DataFrame(data_duplicates)

# # Use pivot_table to aggregate duplicate columns and select 'A' column
# column_A_pivot = df_duplicates.pivot_table(index=df_duplicates.index, columns=df_duplicates.columns, aggfunc='sum')['A']
# print(column_A_pivot)

In [50]:
# Select columns with names containing 'A'
columns_with_A_str = df.loc[:, df.columns.str.contains('A')]
print(columns_with_A_str)

   A
0  1
1  2
2  3


In [51]:
# Select column elements where the element is greater than 2
column_elements_greater_than_2 = df.applymap(lambda x: x if x > 2 else None)['A']
print(column_elements_greater_than_2)

0    NaN
1    NaN
2    3.0
Name: A, dtype: float64


In [52]:
# Set 'A' column as index
df.set_index('A', inplace=True)

# Select rows where index values are greater than 1
rows_index_greater_than_1 = df.query('index > 1')
print(rows_index_greater_than_1)

   B
A   
2  5
3  6


In [53]:
# Iterate over columns and select specific columns
selected_columns = [col for col_name, col in df.items() if 'A' in col_name]
print(selected_columns)

[]


In [54]:
# Select columns where column names do not contain 'A'
columns_not_with_A = df.loc[:, ~df.columns.str.contains('A')]
print(columns_not_with_A)

   B
A   
1  4
2  5
3  6


In [55]:
# Select numeric columns
numeric_columns = df.select_dtypes(include=['number'])
print(numeric_columns)

   B
A   
1  4
2  5
3  6


In [57]:
# # Select columns 'A' and 'B' using a list of column names
# columns_list = df[['A', 'B']]
# print(columns_list)

In [58]:
# Select column 'C' with default value 0
column_C_with_default = df.get('C', default=0)
print(column_C_with_default)

0


In [60]:
# # Select column 'A' as a Series
# column_A_series = df['A']
# print(type(column_A_series))