[Reference](https://ai.plainenglish.io/5-pandas-tricks-every-data-scientist-must-know-abf1578b603f)

# Sort a DataFrame Based on Another List

In [1]:
import pandas as pd

# List of categories and their priority rankings
categories = ['low', 'medium', 'high', 'critical']
priority = [1, 2, 3, 4]

# Create a Series to map category names to priority
priority_map = pd.Series(priority, index=categories)

# Sample DataFrame
df = pd.DataFrame({
    'category': ['high', 'low', 'medium', 'critical'],
    'value': [100, 200, 300, 400]
})

# Sort the DataFrame based on the category priority
df_sorted = df.sort_values("category", key=lambda x: x.map(priority_map))

print(df_sorted)

   category  value
1       low    200
2    medium    300
0      high    100
3  critical    400


In [2]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Score': [85, 90, 95, 80]
})

# Custom sorting list
sort_order = ['Charlie', 'Alice', 'Diana', 'Bob']

# Sort DataFrame by 'Name' column using the order in sort_order
df['SortKey'] = pd.Categorical(df['Name'], categories=sort_order, ordered=True)
sorted_df = df.sort_values('SortKey').drop(columns='SortKey')

print(sorted_df)

      Name  Score
2  Charlie     95
0    Alice     85
3    Diana     80
1      Bob     90


# Insert a Column at a Specific Location in a DataFrame

In [3]:
# Add a 'Grade' column between 'Name' and 'Score'
df.insert(1, 'Grade', ['B', 'A', 'A+', 'C'])

print(df)

      Name Grade  Score  SortKey
0    Alice     B     85    Alice
1      Bob     A     90      Bob
2  Charlie    A+     95  Charlie
3    Diana     C     80    Diana


# Select Columns Based on the Column’s Data Type

In [4]:
# Mixed DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Score': [85, 90, 95, 80],
    'Passed': [True, True, False, True]
})

# Select only numeric columns
numeric_cols = df.select_dtypes(include='number')

print(numeric_cols)

   Score
0     85
1     90
2     95
3     80


# Count the Number of Non-NaN Cells for Each Column

In [5]:
# Sample DataFrame with NaN values
import numpy as np

df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Score': [85, 90, np.nan, 80],
    'Grade': ['B', np.nan, 'A+', 'C']
})

# Count non-NaN cells for each column
non_nan_counts = df.count()

print(non_nan_counts)

Name     4
Score    3
Grade    3
dtype: int64


# Split a DataFrame into Equal Parts


In [6]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank'],
    'Score': [85, 90, 95, 80, 88, 92]
})

# Split DataFrame into 3 equal parts
splits = np.array_split(df, 3)

# Print each split
for i, part in enumerate(splits):
    print(f"Part {i + 1}:\n{part}\n")

Part 1:
    Name  Score
0  Alice     85
1    Bob     90

Part 2:
      Name  Score
2  Charlie     95
3    Diana     80

Part 3:
    Name  Score
4    Eve     88
5  Frank     92



  return bound(*args, **kwds)
