# Pandas DataFrame: Quick Reference & Examples
This notebook demonstrates common Pandas DataFrame operations on a small sample dataset.

## 1) Setup & Data

In [None]:
import pandas as pd

# Sample data
data = {
    "Name": ["Alice", "Bob", "Cathy", "Dan"],
    "Age": [25, 30, 22, 35],
    "Purchase": [100, 200, 150, 300]
}

df = pd.DataFrame(data)
df

## 2) Basics: Version, shape, preview, and columns

In [None]:
print("pandas version:", pd.__version__)
print("df.shape:", df.shape)        # (rows, columns)
print("\n.head():")
print(df.head())
print("\n.tail():")
print(df.tail())
print("\n.columns:")
print(df.columns)
print("\n.keys():")
print(df.keys())
print("\n.dtypes:")
print(df.dtypes)
print("\n.info():")
df.info()

## 3) Describe numeric columns

In [None]:
# .DESCR is not a pandas attribute; it's used in scikit-learn datasets.
# In pandas, use .describe() for summary stats.
df.describe()

## 4) Selection with .loc (label-based) and .iloc (position-based)

In [None]:
# .loc examples (labels)
print("df.loc[0, 'Age'] ->", df.loc[0, "Age"])
print("\nAll rows in 'Purchase' with loc:")
print(df.loc[:, "Purchase"])

# .iloc examples (integer positions)
print("\nValue at first row, first column (Name) with iloc:", df.iloc[0, 0])
print("\nAll rows, second column (Age) with iloc:")
print(df.iloc[:, 1])

## 5) Sorting

In [None]:
# Sort by a valid column (e.g., 'Purchase')
print("Descending by Purchase")
print(df.sort_values(by="Purchase", ascending=False))

print("\nAscending by Age")
print(df.sort_values(by="Age", ascending=True))

## 6) Unique values & aggregates

In [None]:
print("Unique Names:", df["Name"].unique())
print("Sum of Purchase:", df["Purchase"].sum())
print("\nColumn-wise min (numeric only):")
print(df.min(numeric_only=True))
print("\nColumn-wise max (numeric only):")
print(df.max(numeric_only=True))
print("\nColumn-wise mean (numeric only):")
print(df.mean(numeric_only=True))

## 7) Common mistakes (demonstrated safely)

In [None]:
def try_print(label, func):
    try:
        out = func()
        print(f"{label}:\n{out}\n")
    except Exception as e:
        print(f"{label} -> Error: {type(e).__name__}: {e}\n")

# These demonstrate common KeyErrors due to wrong column names:
try_print("df[['A']]", lambda: df[["A"]])
try_print("df.loc[0:1, ['A']]", lambda: df.loc[0:1, ["A"]])
try_print("df.sort_values(by='B')", lambda: df.sort_values(by="B"))