In [1]:
# Import libraries
import numpy as np
import pandas as pd

In [2]:
# Create & View Dataset
data = {
    "A" : [1, 2, 3],
    "B" : [4, 5, 6],
    "C" : [7, 8, 9],
    "D" : [10, 11, 12],
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,1,4,7,10
1,2,5,8,11
2,3,6,9,12


In [3]:
# Find the Column Names
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [4]:
# Find the Number of Columns in Dataset
len(df.columns)

4

In [5]:
# Find the Number of Rows in Dataset
len(df.index)

3

In [6]:
# Select Specific Column by Column Name
df["A"]  # Returned as a pandas series

0    1
1    2
2    3
Name: A, dtype: int64

In [7]:
# Select Specific Column by Position
df.iloc[:, 0]  # Returned as a pandas series

0    1
1    2
2    3
Name: A, dtype: int64

In [8]:
# Select Specific Row by Index Number
df.loc[1]  # Returned as a pandas series

A     2
B     5
C     8
D    11
Name: 1, dtype: int64

In [9]:
# Select Specific Row by Position
df.iloc[1]  # Returned as a pandas series

A     2
B     5
C     8
D    11
Name: 1, dtype: int64

In [10]:
# Select Specific Cell (Column & Row)
df.loc[1, "A"]

2

In [11]:
# Filter Dataset For Specific Value in Column
df_filtered = df[df["A"] == 2]
df_filtered

Unnamed: 0,A,B,C,D
1,2,5,8,11


In [12]:
# Filter Dataset With List Comprehension on a Single Column of Data
df_filtered = df[[val % 2 == 0 for val in df["B"]]]  # Looks for even numbers in column B
df_filtered

Unnamed: 0,A,B,C,D
0,1,4,7,10
2,3,6,9,12


In [13]:
# Filter Dataset With List Comprehension Across Entire Row of Data
df_filtered = df[[df.loc[row, "A"] == 1 or df.loc[row, "B"] == 5 for row in df.index]]
df_filtered

Unnamed: 0,A,B,C,D
0,1,4,7,10
1,2,5,8,11


In [14]:
# Reorder Columns
df_reordered = df.loc[:, ["A", "D", "C", "B"]]
df_reordered

Unnamed: 0,A,D,C,B
0,1,10,7,4
1,2,11,8,5
2,3,12,9,6


In [15]:
# Remove Columns
df_removed = df.loc[:, ["A", "B"]]  # Give only the column(s) you want to keep
df_removed

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [16]:
# Create New Column With List Comprehension
df["E"] = [f"E.{val}.{val + 1}" for val in range(13, 16)]
df

Unnamed: 0,A,B,C,D,E
0,1,4,7,10,E.13.14
1,2,5,8,11,E.14.15
2,3,6,9,12,E.15.16


In [17]:
# Replace Values
df.replace(1, 100)

Unnamed: 0,A,B,C,D,E
0,100,4,7,10,E.13.14
1,2,5,8,11,E.14.15
2,3,6,9,12,E.15.16


In [18]:
# Rename Column
df_renamed = df.rename(columns={"A": "F"})
df_renamed

Unnamed: 0,F,B,C,D,E
0,1,4,7,10,E.13.14
1,2,5,8,11,E.14.15
2,3,6,9,12,E.15.16


In [19]:
# Remove Blank Rows

# I need to add a blank row to demonstrate this...
df.iloc[1] = np.nan  # Sets every value on row 1 to NaN (Not a Number)

df.dropna(how="all", inplace=True)
df  # Note the index jumps from 0 -> 2

Unnamed: 0,A,B,C,D,E
0,1.0,4.0,7.0,10.0,E.13.14
2,3.0,6.0,9.0,12.0,E.15.16


In [20]:
# Reminder about .iloc vs loc
df.iloc[1] = 555
df  # Note that .iloc references relative positions, not exact index numbers and names like .loc

Unnamed: 0,A,B,C,D,E
0,1.0,4.0,7.0,10.0,E.13.14
2,555.0,555.0,555.0,555.0,555


In [21]:
# Remove Blank Columns

# I need to add a blank column to demonstrate this...
df["C"] = np.NaN  # Sets every value on columnn C to NaN (Not a Number)

# Find columns that are not blank
columns_that_are_not_blank = [i[0] for i in zip(df.columns, df.isna().all()) if i[1] == False]

# Remove them as was shown above
df = df.loc[:, columns_that_are_not_blank]  # Give only the column(s) you want to keep
df

Unnamed: 0,A,B,D,E
0,1.0,4.0,10.0,E.13.14
2,555.0,555.0,555.0,555


In [22]:
# Remove Rows with Blanks in a Given Column

# I need to add a blank value in a column to demonstrate this...
df.loc[2, "D"] = np.NaN

df.dropna(subset=["D"], inplace=True)
df

Unnamed: 0,A,B,D,E
0,1.0,4.0,10.0,E.13.14


In [23]:
# Remove Header Rows

# determine which row should be the new header row
header_row = 0

# store new header names
cols = [_ for _ in df.iloc[header_row]]

# Remove rows up until that point
df = df.drop(df.head(header_row + 1).index)

# Reset df.columns with new column names
df.columns = cols
df

Unnamed: 0,1.0,4.0,10.0,E.13.14
