# Statistics Tutorial Single File

This notebook demonstrates how to load, preprocess, locate, and extract statistics from nanoindentation data using the merrypopins library,
for a single data file.

### Required Imports 

In [None]:
from merrypopins.load_datasets import load_txt
from merrypopins.preprocess import default_preprocess
from merrypopins.locate import default_locate
from merrypopins.statistics import (
    postprocess_popins_local_max,
    extract_popin_intervals,
    calculate_popin_statistics,
    calculate_curve_summary,
    calculate_stress_strain,
    calculate_stress_strain_statistics,
    default_statistics,
    default_statistics_stress_strain
)

import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path

ModuleNotFoundError: No module named 'merrypopins'

### Load and preprocess one indentation file

In [None]:
# path to single file
file_path = Path("datasets/6microntip_slowloading/grain5_6um_indent02_HL_QS_LC.txt") # select your own file


# Load raw data
df_raw = load_txt(file_path)
df_raw.head()

In [None]:
#preprocess the data
df_clean = default_preprocess(df_raw)
df_clean.head()

### Locate pop-ins

In [None]:
df_located = default_locate(df_clean, use_cnn=False, use_iforest=False)
df_located[df_located["popin"]].loc[
    :, ["Time (s)", "Load (µN)", "Depth (nm)", "popin"]
].head()


### Postprocess and extract pop-in intervals

In [None]:
# Select only pop-ins that have a local load maximum before the peak
df_post = postprocess_popins_local_max(df_located, popin_flag_column="popin")

# Extract intervals (start and end of each pop-in event)
df_intervals = extract_popin_intervals(df_post)

# View start and end indices
df_intervals[df_intervals["popin_selected"]].loc[
    :, ["Time (s)", "Load (µN)", "Depth (nm)", "popin_selected", "start_idx", "end_idx"]
].head()


### Plot load-depth curves with pop-in intervals

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(df_intervals["Depth (nm)"], df_intervals["Load (µN)"], label="Indentation curve")

# Mark pop-in starts
start_mask = df_intervals["start_idx"].notna()
start_indices = df_intervals.loc[start_mask, "start_idx"].astype(int)
plt.scatter(
    df_intervals.loc[start_indices, "Depth (nm)"],
    df_intervals.loc[start_indices, "Load (µN)"],
    color="red", label="Pop-in start", zorder=10
)

# Mark pop-in ends
end_indices = df_intervals.loc[start_mask, "end_idx"].astype(int)
plt.scatter(
    df_intervals.loc[end_indices, "Depth (nm)"],
    df_intervals.loc[end_indices, "Load (µN)"],
    color="blue", label="Pop-in end", zorder=10, alpha=0.5
)

plt.xlabel("Depth (nm)")
plt.ylabel("Load (µN)")
plt.title("Load–Depth Curve with Pop-In Start/End Points")
plt.legend()
plt.show()

### Compute load-depth pop-in statistics

In [None]:
# Full pipeline: includes postprocessing, interval extraction, and stats
df_stats_ld = default_statistics(df_located)
df_stats_ld[df_stats_ld["popin_selected"]].loc[
    :, [
        "popin_selected",
        "dLoad",
        "popin_length",
        "time_until_next",
        "avg_time_during",
        "avg_dload_before",
        "slope_before",
        "depth_jump",
        "avg_depth_during",
        "avg_depth_velocity",
        "avg_curvature_depth",
    ]
].head()


### Summary statistics of curve

In [None]:
summary = calculate_curve_summary(df_stats_ld)
summary.to_frame("value")


### Convert to stress-strain

In [None]:
# Compute stress–strain and associated pop-in stats
df_stats_ss = default_statistics_stress_strain(df_located)

# Show computed stress–strain values
df_stats_ss[df_stats_ss["popin_selected"]].loc[
    :, ["stress", "strain", "popin_selected"]
].head()


### Plot stress-strain curve with pop-ins

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(df_stats_ss["strain"], df_stats_ss["stress"], label="Stress–strain")
plt.scatter(
    df_stats_ss.loc[df_stats_ss["popin_selected"], "strain"],
    df_stats_ss.loc[df_stats_ss["popin_selected"], "stress"],
    color="red", label="Pop-in", zorder=10
)
plt.xlabel("Strain")
plt.ylabel("Stress (MPa)")
plt.title("Stress–Strain Curve with Pop-Ins")
plt.legend()
plt.show()

### Pop-in stress-strain statistics

In [None]:
# Full pipeline: includes postprocessing, interval extraction, and stress-strain stats
df_stats_ss[df_stats_ss["popin_selected"]].loc[
    :, [
        "popin_selected",
        "stress_jump",
        "strain_jump",
        "avg_stress_during",
        "avg_strain_during",
        "stress_slope",
        "strain_slope",
        "avg_dstress_before",
        "avg_dstrain_before",
        "stress_slope_before",
        "strain_slope_before",
    ]
].head()


### Optional: export full datasets

In [None]:
# Set a flag: True to export, False to skip
export_csv = False

if export_csv:
    df_stats_ld.to_csv("popin_statistics_load_depth.csv", index=False)
    df_stats_ss.to_csv("popin_statistics_stress_strain.csv", index=False)
    print("Exported full datasets to CSV.")
else:
    print("CSV export skipped.")

### Optional: export only statistics of the pop-ins

In [None]:
export_filtered = False  # Set to True to export only selected pop-ins

if export_filtered:
    df_stats_ld[df_stats_ld["popin_selected"]].to_csv(
        "popin_selected_load_depth.csv", index=False
    )
    df_stats_ss[df_stats_ss["popin_selected"]].to_csv(
        "popin_selected_stress_strain.csv", index=False
    )
    print("Exported filtered pop-in rows to CSV.")
else:
    print("Filtered CSV export skipped.")