[Reference](https://towardsdatascience.com/10-most-frequently-asked-pandas-questions-on-stack-overflow-b9d7d94cd83e)

In [11]:
import numpy as np
import pandas as pd

df = pd.DataFrame({
    
    "first_name": ["John", "Jane", "Emily", "Matt", "Alex", "George", "Max"],
    "last_name": ["Doe", "Doe", "Uth", "Dan", "Mir", "Jen", "Potter"],
    "start_date": ["2022-10-04", np.nan, "2022-08-04", np.nan, np.nan, "2021-12-10", "2022-02-05"],
    "group": ["A", "B", "A", "A", "C", "D", "D"],
    "salary": [75000, 72000, 45000, 77000, np.nan, np.nan, 65000]

})

df

Unnamed: 0,first_name,last_name,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
1,Jane,Doe,,B,72000.0
2,Emily,Uth,2022-08-04,A,45000.0
3,Matt,Dan,,A,77000.0
4,Alex,Mir,,C,
5,George,Jen,2021-12-10,D,
6,Max,Potter,2022-02-05,D,65000.0


# 1. How to iterate over rows in a DataFrame in Pandas

In [2]:
for index, row in df.iterrows():
    print(row[0], row[1])

John Doe
Jane Doe
Emily Uth
Matt Dan
Alex Mir
George Jen
Max Potter


# 2. How do I select rows from a DataFrame based on column values?

In [3]:
df[df["salary"] > 70000] 

Unnamed: 0,first_name,last_name,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
1,Jane,Doe,,B,72000.0
3,Matt,Dan,,A,77000.0


In [4]:
df[(df["salary"] > 70000) & (df["group"] == "A")] 

Unnamed: 0,first_name,last_name,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
3,Matt,Dan,,A,77000.0


In [5]:
df[df["group"].isin(["A", "B", "C"])] 

Unnamed: 0,first_name,last_name,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
1,Jane,Doe,,B,72000.0
2,Emily,Uth,2022-08-04,A,45000.0
3,Matt,Dan,,A,77000.0
4,Alex,Mir,,C,


# 3. Renaming column names in Pandas

In [6]:
df = df.rename(columns={"first_name": "fname", "last_name": "lname"})

df

Unnamed: 0,fname,lname,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
1,Jane,Doe,,B,72000.0
2,Emily,Uth,2022-08-04,A,45000.0
3,Matt,Dan,,A,77000.0
4,Alex,Mir,,C,
5,George,Jen,2021-12-10,D,
6,Max,Potter,2022-02-05,D,65000.0


# 4. Delete a column from a Pandas DataFrame

In [7]:
# dropping the start_date and group columns
df_new = df.drop(["start_date", "group"], axis=1)

df_new.columns

Index(['fname', 'lname', 'salary'], dtype='object')

In [8]:
# dropping the start_date and group columns
df.drop(["start_date", "group"], axis=1, inplace=True)

# 5. How do I get the row count of a Pandas DataFrame?

In [9]:
df.shape
len(df)

7

# 6. Selecting multiple columns in a Pandas DataFrame

In [12]:
cols = ["first_name", "last_name", "salary"]

df[cols]

Unnamed: 0,first_name,last_name,salary
0,John,Doe,75000.0
1,Jane,Doe,72000.0
2,Emily,Uth,45000.0
3,Matt,Dan,77000.0
4,Alex,Mir,
5,George,Jen,
6,Max,Potter,65000.0


# 7. How to change the order of DataFrame columns

In [13]:
new_cols = ["salary", "group", "first_name", "last_name", "start_date"]

df.loc[:, new_cols]

Unnamed: 0,salary,group,first_name,last_name,start_date
0,75000.0,A,John,Doe,2022-10-04
1,72000.0,B,Jane,Doe,
2,45000.0,A,Emily,Uth,2022-08-04
3,77000.0,A,Matt,Dan,
4,,C,Alex,Mir,
5,,D,George,Jen,2021-12-10
6,65000.0,D,Max,Potter,2022-02-05


# 8. Change column type in Pandas

In [14]:
df.dtypes

first_name     object
last_name      object
start_date     object
group          object
salary        float64
dtype: object

In [15]:
df["start_date"] = df["start_date"].astype("datetime64[ns]")

df.dtypes

first_name            object
last_name             object
start_date    datetime64[ns]
group                 object
salary               float64
dtype: object

In [16]:
df = df.astype({
    "start_date": "datetime64[ns]",
    "first_name": "string",
    "last_name": "string"
})

df.dtypes

first_name            string
last_name             string
start_date    datetime64[ns]
group                 object
salary               float64
dtype: object

# 9. How to drop rows of Pandas DataFrame whose value in a certain column is NaN

In [17]:
df.dropna()

Unnamed: 0,first_name,last_name,start_date,group,salary
0,John,Doe,2022-10-04,A,75000.0
2,Emily,Uth,2022-08-04,A,45000.0
6,Max,Potter,2022-02-05,D,65000.0


# 10. Get a list from Pandas DataFrame column headers

In [18]:
col_headers = list(df.columns)

col_headers

['first_name', 'last_name', 'start_date', 'group', 'salary']