# Pandas DataFrames — Basics, Indexing & MultiIndex (with solutions)

Run top-to-bottom. Uses only numpy/pandas. Exercises are solved inline.


In [None]:

import numpy as np, pandas as pd
np.random.seed(101)

# Build a reproducible DataFrame
df = pd.DataFrame(
    np.random.randn(5,4),
    index=list("ABCDE"),
    columns=list("WXYZ")
)
df


## Selecting columns (Series vs DataFrame)

In [None]:

# Single column -> Series
w = df["W"]
type(w), w.head()


In [None]:

# Multiple columns -> DataFrame
df[["W","Z"]].head()


## Create / drop columns (axis=1)

In [None]:

# Create
df["NEW"] = df["W"] + df["Y"]
df.head()


In [None]:

# Drop (not in-place)
tmp = df.drop("NEW", axis=1)
tmp.columns, "NEW" in tmp.columns


In [None]:

# Drop in-place
df.drop("NEW", axis=1, inplace=True)
df.columns


## Shape & axis semantics (rows=axis 0, cols=axis 1)

In [None]:
df.shape, {'rows': df.shape[0], 'cols': df.shape[1]}

## Row selection with .loc (labels) and .iloc (integer positions)

In [None]:

row_A_loc = df.loc["A"]
row_2_iloc = df.iloc[2]  # C row
row_A_loc, row_2_iloc


## Subsets with .loc[[rows], [cols]]

In [None]:

df.loc[["A","B"], ["W","Y"]]


## Conditional selection & multiple conditions

In [None]:

# Boolean filter on a column
mask = df["W"] > 0
df[mask]


In [None]:

# Combine: (W>0) & (Y>0.5)
df[(df["W"]>0) & (df["Y"]>0.5)]


## Reset / set index

In [None]:

df_reset = df.reset_index()
df_reset.head()


In [None]:

# Set custom index
states = "CA NY WY OR CO".split()
df_idx = df.copy()
df_idx["STATE"] = states
df_idx.set_index("STATE", inplace=True)
df_idx.head()


## MultiIndex (index hierarchy) & cross-section

In [None]:

outside = ["G1"]*3 + ["G2"]*3
inside = [1,2,3,1,2,3]
hier_index = pd.MultiIndex.from_tuples(list(zip(outside, inside)), names=["GROUP","NUM"])
dfm = pd.DataFrame(np.round(np.random.randn(6,2),3), index=hier_index, columns=list("AB"))
dfm


In [None]:

# Index from outside in
val = dfm.loc["G2"].loc[2]["B"]
val


In [None]:

# Cross-section: select NUM==1 across all groups
dfm.xs(key=1, level="NUM")


## Exercises (solved)

In [None]:

# E1: Select columns W and Z for rows A..C
sol_E1 = df.loc[["A","B","C"], ["W","Z"]]
sol_E1


In [None]:

# E2: Add column SUM = X+Y, then drop it in place
df2 = df.copy()
df2["SUM"] = df2["X"] + df2["Y"]
df2.drop("SUM", axis=1, inplace=True)
df2.equals(df)  # True means back to original


In [None]:

# E3: Filter rows where (Z<0) OR (W>0.5)
sol_E3 = df[(df["Z"]<0) | (df["W"]>0.5)]
sol_E3
