# USDA Soil Texture Profile by Depth (Matplotlib)

This notebook plots a **USDA soil texture profile by depth** from an Excel table like the examples you uploaded.

### Expected Excel columns (case-insensitive)
- `Sample` (layer ID or horizon name)
- `Texture` (e.g., *loam*, *sandy clay loam*, *sand*)
- Depth columns in **both units**:
  - `Top_ft`, `Bottom_ft`
  - `Top_cm`, `Bottom_cm`

You can choose **one file at a time** (manually) and choose whether to plot the profile in **feet** or **centimeters**.


In [10]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Patch

# ---- Folder where your uploaded example files live (adjust if needed) ----
DATA_DIR = "/workspaces/hwrs564b_course_materials_JessicaCarvalho007/Homework/4_Measurements/USDA Soil Texture"

# Show candidate Excel files so you can pick one
sorted([f for f in os.listdir(DATA_DIR) if f.lower().endswith((".xlsx", ".xls"))])[:50]


['Sample 1.1 textures.xlsx',
 'Sample 1.1.xlsx',
 'Sample 2.1 textures.xlsx',
 'Sample 2.1.xlsx',
 'Sample 2.2.xlsx',
 'my_soils.xlsx']

## 1) Choose the file and the plotting unit

Set:
- `EXCEL_PATH` to the file you want to plot **this run**
- `DEPTH_UNITS` to `"cm"` or `"ft"`


In [11]:
# ---- Choose ONE file at a time ----
EXCEL_PATH = os.path.join(DATA_DIR, "Sample 1.1 textures.xlsx")  # <-- change this each run

# ---- Choose units for plotting: "cm" or "ft" ----
DEPTH_UNITS = "ft"  # "cm" or "ft"

# Optional: customize the figure
FIGSIZE = (3.2, 7.5)
DPI = 200
TITLE = None  # e.g., "Pit A - Texture by Depth"
SAVE_FIG = False
OUT_PNG = os.path.join(DATA_DIR, "texture_profile.png")


## 2) Functions: load, normalize labels, and plot

These functions:
- read the Excel file
- normalize texture labels (e.g., `"sandy clay loam"` â†’ `"Sandy Clay Loam"`)
- validate columns
- plot the profile using consistent colors + hatches


In [12]:
# --- Style map (edit these if you want different colors/hatches) ---
TEXTURE_STYLE = {
    "Loam":            dict(facecolor="tan",        hatch=".."),
    "Sandy Clay Loam": dict(facecolor="peru",       hatch="xx"),
    "Sandy Loam":      dict(facecolor="wheat",      hatch="//"),
    "Sand":            dict(facecolor="khaki",      hatch="oo"),
    "Clay":            dict(facecolor="sienna",     hatch="\\"),
    "Sandy Clay":      dict(facecolor="chocolate",  hatch="++"),
}

# Aliases -> canonical texture labels (extend this as you encounter new spellings)
_TEXTURE_ALIASES = {
    "loam": "Loam",
    "sandy clay loam": "Sandy Clay Loam",
    "sandy loam": "Sandy Loam",
    "sand": "Sand",
    "clay": "Clay",
    "sandy clay": "Sandy Clay",
}

def normalize_texture_label(x: str) -> str:
    """
    Normalize texture strings to a canonical label used by TEXTURE_STYLE.
    Example: ' sandy  clay loam ' -> 'Sandy Clay Loam'
    """
    if pd.isna(x):
        return ""
    s = str(x).strip().lower()
    s = re.sub(r"[_\-]+", " ", s)         # underscores/dashes -> spaces
    s = re.sub(r"\s+", " ", s).strip()    # collapse whitespace
    return _TEXTURE_ALIASES.get(s, s.title())  # fallback: Title Case

def _find_col(df, candidates):
    """
    Return the first matching column name from candidates (case-insensitive).
    """
    cols = {c.lower(): c for c in df.columns}
    for cand in candidates:
        if cand.lower() in cols:
            return cols[cand.lower()]
    return None

def load_texture_excel(excel_path: str, sheet_name=None) -> pd.DataFrame:
    """
    Load an Excel file containing texture layers. Returns a cleaned DataFrame
    with standardized columns:
      Sample, Texture, Top_ft, Bottom_ft, Top_cm, Bottom_cm
    """
    if not os.path.exists(excel_path):
        raise FileNotFoundError(f"Excel file not found: {excel_path}")

    df = pd.read_excel(excel_path, sheet_name=sheet_name)

    # Column discovery (case-insensitive)
    col_sample  = _find_col(df, ["Sample", "Layer", "Horizon", "ID"])
    col_texture = _find_col(df, ["Texture", "USDA_Texture", "Class"])

    col_top_ft    = _find_col(df, ["Top_ft", "Top (ft)", "Top_ft."])
    col_bottom_ft = _find_col(df, ["Bottom_ft", "Bottom (ft)", "Bottom_ft."])
    col_top_cm    = _find_col(df, ["Top_cm", "Top (cm)", "Top_cm."])
    col_bottom_cm = _find_col(df, ["Bottom_cm", "Bottom (cm)", "Bottom_cm."])

    required = {
        "Sample": col_sample,
        "Texture": col_texture,
        "Top_ft": col_top_ft,
        "Bottom_ft": col_bottom_ft,
        "Top_cm": col_top_cm,
        "Bottom_cm": col_bottom_cm,
    }
    missing = [k for k, v in required.items() if v is None]
    if missing:
        raise ValueError(
            "Missing required columns in Excel.\n"
            f"Missing: {missing}\n"
            f"Found columns: {list(df.columns)}"
        )

    out = df[[col_sample, col_texture, col_top_ft, col_bottom_ft, col_top_cm, col_bottom_cm]].copy()
    out.columns = ["Sample", "Texture", "Top_ft", "Bottom_ft", "Top_cm", "Bottom_cm"]

    # Clean/convert
    out["Sample"] = out["Sample"].astype(str).str.strip()
    out["Texture_raw"] = out["Texture"]
    out["Texture"] = out["Texture"].apply(normalize_texture_label)

    for c in ["Top_ft", "Bottom_ft", "Top_cm", "Bottom_cm"]:
        out[c] = pd.to_numeric(out[c], errors="coerce")

    # Drop rows missing required numeric depths or texture
    out = out.dropna(subset=["Top_ft", "Bottom_ft", "Top_cm", "Bottom_cm"])
    out = out[out["Texture"].astype(str).str.len() > 0].copy()

    # Sort by depth (cm by default)
    out = out.sort_values(["Top_cm", "Bottom_cm"]).reset_index(drop=True)

    # Basic validation
    bad = out[(out["Bottom_cm"] <= out["Top_cm"]) | (out["Bottom_ft"] <= out["Top_ft"])]
    if len(bad) > 0:
        raise ValueError(
            "Found layers where Bottom <= Top (check your depths). "
            f"Problem rows:\n{bad[['Sample','Top_ft','Bottom_ft','Top_cm','Bottom_cm','Texture_raw']].to_string(index=False)}"
        )

    return out

def plot_texture_profile_df(
    df: pd.DataFrame,
    depth_units: str = "cm",
    ax=None,
    figsize=(3.2, 7.5),
    dpi=200,
    title=None,
    label_layers=True,
    legend=True,
):
    """
    Plot a single texture profile from df (one file / one profile).
    depth_units: "cm" or "ft"
    """
    depth_units = depth_units.lower().strip()
    if depth_units not in {"cm", "ft"}:
        raise ValueError("depth_units must be 'cm' or 'ft'")

    top_col = "Top_cm" if depth_units == "cm" else "Top_ft"
    bot_col = "Bottom_cm" if depth_units == "cm" else "Bottom_ft"

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
    else:
        fig = ax.figure

    # single column
    x0, w = 0.0, 1.0

    # keep track of textures present (for legend)
    present = []

    for _, row in df.iterrows():
        top = float(row[top_col])
        bot = float(row[bot_col])
        tex = str(row["Texture"]).strip()

        if tex not in TEXTURE_STYLE:
            known = ", ".join(TEXTURE_STYLE.keys())
            raise KeyError(
                f"Texture '{tex}' not in TEXTURE_STYLE.\n"
                f"Known textures: {known}\n"
                "Fix spelling/case or add it to TEXTURE_STYLE."
            )

        style = TEXTURE_STYLE[tex]
        height = bot - top

        ax.add_patch(Rectangle(
            (x0, top), w, height,
            facecolor=style["facecolor"],
            edgecolor="black",
            linewidth=1.2,
            hatch=style.get("hatch", None)
        ))

        if label_layers:
            ax.text(
                x0 + w/2,
                top + height/2,
                f"{row['Sample']}\n{tex}",
                ha="center",
                va="center",
                fontsize=8
            )

        if tex not in present:
            present.append(tex)

    ax.set_xlim(0, 1)
    ax.set_xticks([])
    ax.set_ylabel(f"Depth ({depth_units})")
    ax.set_title(title or "USDA Texture by Depth")

    # Depth increases downward (common soil-profile convention)
    ax.invert_yaxis()

    if legend:
        handles = [
            Patch(facecolor=TEXTURE_STYLE[t]["facecolor"],
                  edgecolor="black",
                  hatch=TEXTURE_STYLE[t].get("hatch", None),
                  label=t)
            for t in present
        ]
        ax.legend(handles=handles, loc="upper right", bbox_to_anchor=(1.65, 1.0))

    return fig, ax


## 3) Load your selected file and inspect

This prints:
- the cleaned table
- the unique (normalized) texture classes found


In [13]:
df = load_texture_excel(EXCEL_PATH)
df, sorted(df["Texture"].unique())


AttributeError: 'dict' object has no attribute 'columns'

## 4) Plot the profile (one at a time)

Run this cell after you pick `EXCEL_PATH` and `DEPTH_UNITS`.


In [None]:
fig, ax = plot_texture_profile_df(
    df,
    depth_units=DEPTH_UNITS,
    figsize=FIGSIZE,
    dpi=DPI,
    title=TITLE,
    label_layers=True,
    legend=True,
)

plt.show()

if SAVE_FIG:
    fig.savefig(OUT_PNG, bbox_inches="tight", dpi=DPI)
    print(f"Saved figure to: {OUT_PNG}")


## Notes / common tweaks

- If you see a `KeyError: Texture '...' not in TEXTURE_STYLE`, you likely have a new texture class in your data.
  - Add a style to `TEXTURE_STYLE`, **or**
  - Add an alias in `_TEXTURE_ALIASES` mapping it to an existing style key.
- If you want no labels inside layers, set `label_layers=False` in the plotting call.
- If you want the y-axis to start at the deepest point (opposite convention), remove `ax.invert_yaxis()`.
