In [3]:
import pandas as pd

# Selecting columns

In [5]:
# Sample DataFrame
df = pd.DataFrame({
    "Name": ["Onkar", "Amit", "Sara", "Rohit"],
    "Age": [21, 25, 23, 29],
    "City": ["Pune", "Mumbai", "Nashik", "Pune"],
    "Salary": [50000, 65000, 55000, 70000]
})

df

Unnamed: 0,Name,Age,City,Salary
0,Onkar,21,Pune,50000
1,Amit,25,Mumbai,65000
2,Sara,23,Nashik,55000
3,Rohit,29,Pune,70000


## 1. Select ONE column

This returns a Series  
Points:
1. `df["col_name"]`
   Very common and best practice
2. df.name
   This works only when the column name dont have space

In [6]:
df["Age"]

0    21
1    25
2    23
3    29
Name: Age, dtype: int64

In [7]:
df.City

0      Pune
1    Mumbai
2    Nashik
3      Pune
Name: City, dtype: object

## 2. Select MULTIPLE columns

This returns a DataFrame with mentioned columns in the returned DataFrame.  
Point:   
`df[["col_name1", "col_name2", ...]]`

In [8]:
df[["Name", "Age"]]

Unnamed: 0,Name,Age
0,Onkar,21
1,Amit,25
2,Sara,23
3,Rohit,29


## 3. Select columns using Python List

This also returns a DataFrame with column names mentioned in Python List.  
Point:  
`List = ["Age", "City"]`  
`df[List]`

In [9]:
List = ["Name", "City"]
df[List]

Unnamed: 0,Name,City
0,Onkar,Pune
1,Amit,Mumbai
2,Sara,Nashik
3,Rohit,Pune


## 4. Select columns by Data Type

Get the only columns which have specified Data types.  
Point:  

1. Only numeric columns - `df.select_dtypes(include="number")`
2. Only obj/str columns - `df.select_dtypes(include="object")` # str = obj in pandas

In [13]:
df.select_dtypes(include="number")

Unnamed: 0,Age,Salary
0,21,50000
1,25,65000
2,23,55000
3,29,70000


In [14]:
df.select_dtypes(include="object")

Unnamed: 0,Name,City
0,Onkar,Pune
1,Amit,Mumbai
2,Sara,Nashik
3,Rohit,Pune


## 5. Rename while selecting

In [28]:
df.index=["a", "b", "c", "d"]
df

Unnamed: 0,Name,Age,City,Salary
a,Onkar,21,Pune,50000
b,Amit,25,Mumbai,65000
c,Sara,23,Nashik,55000
d,Rohit,29,Pune,70000


In [33]:
df.loc[:, ["Name", "Age"]] # : -> selected all rows/index, [..] -> select those two columns

Unnamed: 0,Name,Age
a,Onkar,21
b,Amit,25
c,Sara,23
d,Rohit,29


In [34]:
df.loc['a':'c', "Name":"City"] # 'a':'c' select index using slicing and "Name":"City" slicing the columns

Unnamed: 0,Name,Age,City
a,Onkar,21,Pune
b,Amit,25,Mumbai
c,Sara,23,Nashik
