# Brownian Motion Analysis

This notebook analyzes microscopy tracking data to calculate diffusion coefficients for polystyrene beads using:
1. Displacement histogram fitting
2. Mean Squared Displacement (MSD) analysis
3. Theoretical Stokes-Einstein prediction

## 1. Imports and Constants

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.optimize import curve_fit
import os

# Physical Constants
KB = 1.380649e-23  # Boltzmann constant (J/K)

# Experimental Parameters
TEMP_C = 21  # Temperature in Celsius (from filename)
TEMP_K = 273.15 + TEMP_C  # Temperature in Kelvin

# Calibration Parameters
FRAME_RATE = 226.0  # fps
PIXEL_SIZE_UM = 0.345  # um/pixel

print(f"Temperature: {TEMP_K:.2f} K")
print(f"Frame rate: {FRAME_RATE} fps")
print(f"Pixel size: {PIXEL_SIZE_UM} um/pixel")

Temperature: 294.15 K
Frame rate: 226.0 fps
Pixel size: 0.345 um/pixel


## 2. Helper Functions

In [2]:
def get_viscosity(temp_c):
    """Returns viscosity of water at given temperature in Pa.s"""
    # Approximate value for 21Â°C
    return 0.0009764  # Pa.s

## 3. Data Loading

In [3]:
def load_track_data(filepath):
    """
    Loads track results from MTrack2 output format.
    File structure: Frame, X1, Y1, Flag1, X2, Y2, Flag2, ...
    """
    print(f"Loading data from {filepath}...")
    try:
        raw_data = np.genfromtxt(filepath, delimiter='\t', skip_header=2, skip_footer=1, invalid_raise=False)
        
        # Remove Frame column (0) and Flag columns (3, 6, 9, ...)
        cols_to_delete = []
        for i in range(raw_data.shape[1] // 3 + 1):
            cols_to_delete.append(3 * i)
        
        # Filter valid columns
        cols_to_delete = [c for c in cols_to_delete if c < raw_data.shape[1]]
        data = np.delete(raw_data, cols_to_delete, axis=1)
        
        print(f"  Loaded {data.shape[0]} frames, {data.shape[1] // 2} particles")
        return data
        
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None

## 4. Particle Processing with Track Splitting

In [4]:
def process_particles(data_matrix, min_length=10, max_jump_px=20):
    """
    Extracts valid trajectory segments from raw data.
    Splits tracks at large jumps (tracking errors) instead of discarding entire tracks.
    
    Parameters:
    - data_matrix: Raw X,Y data (rows=frames, columns=X1,Y1,X2,Y2,...)
    - min_length: Minimum number of points for a valid segment
    - max_jump_px: Maximum allowed jump per frame (pixels)
    
    Returns:
    - List of trajectory arrays, each of shape (N, 2)
    """
    particles = []
    n_particles = data_matrix.shape[1] // 2
    
    for i in range(n_particles):
        x_col = i * 2
        y_col = i * 2 + 1
        
        # Extract track
        track = data_matrix[:, [x_col, y_col]]
        
        # Remove NaNs
        mask = ~np.isnan(track[:, 0]) & ~np.isnan(track[:, 1])
        clean_track = track[mask]
        
        if len(clean_track) < min_length:
            continue
        
        # Calculate step sizes
        diffs = np.diff(clean_track, axis=0)
        steps = np.sqrt(np.sum(diffs**2, axis=1))
        
        # Find indices where jumps exceed threshold
        bad_jump_indices = np.where(steps > max_jump_px)[0]
        
        if len(bad_jump_indices) == 0:
            # No bad jumps - use entire track
            particles.append(clean_track)
        else:
            # Split at bad jump locations
            split_indices = bad_jump_indices + 1
            segments = np.split(clean_track, split_indices)
            
            for seg in segments:
                if len(seg) >= min_length:
                    particles.append(seg)
    
    # Sort by length (longest first)
    particles.sort(key=len, reverse=True)
    
    print(f"Extracted {len(particles)} valid segments from {n_particles} original tracks (splitting at jumps > {max_jump_px} px).")
    return particles

## 5. Diffusion Analysis

In [5]:
def analyze_diffusion(particles, frame_rate, pixel_size_um, bead_diameter_um, filename_tag):
    """
    Performs diffusion analysis using histogram and MSD methods.
    
    Parameters:
    - particles: List of trajectory arrays
    - frame_rate: Camera frame rate (fps)
    - pixel_size_um: Calibration (um/pixel)
    - bead_diameter_um: Bead diameter in microns
    - filename_tag: Label for output files
    
    Returns:
    - D_hist, D_msd, D_theory (all in um^2/s)
    """
    dt = 1.0 / frame_rate
    
    # Select top 10 particles for analysis
    top_particles = particles[:10]
    all_dx = []
    all_dy = []
    
    # --- 1. Displacement Histogram Analysis ---
    for p in top_particles:
        pos_um = p * pixel_size_um
        d_pos = np.diff(pos_um, axis=0)
        all_dx.extend(d_pos[:, 0])
        all_dy.extend(d_pos[:, 1])
    
    all_dx = np.array(all_dx)
    all_dy = np.array(all_dy)
    all_displacements = np.concatenate([all_dx, all_dy])
    
    # Calculate D from variance: sigma^2 = 2 * D * dt
    mu = np.mean(all_displacements)
    sigma = np.std(all_displacements)
    D_hist = (sigma**2) / (2 * dt)
    
    print(f"[{filename_tag}] Histogram Analysis:")
    print(f"  Mean displacement: {mu:.4f} um (should be ~0)")
    print(f"  Std Dev: {sigma:.4f} um")
    print(f"  D (from variance): {D_hist:.4e} um^2/s")
    
    # Plot histogram
    plt.figure(figsize=(10, 6))
    count, bins, _ = plt.hist(all_displacements, 50, density=True, alpha=0.6, color='g', label='Data')
    
    # Gaussian fit overlay
    x_fit = np.linspace(min(bins), max(bins), 100)
    pdf = 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(x_fit - mu)**2 / (2 * sigma**2))
    plt.plot(x_fit, pdf, linewidth=2, color='r', label=f'Gaussian fit (D={D_hist:.2f})')
    
    plt.title(f"Displacement Histogram - {filename_tag}")
    plt.xlabel('Displacement (um)')
    plt.ylabel('Probability Density')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig(f"histogram_{filename_tag}.png", dpi=150)
    plt.show()
    
    # --- 2. MSD Analysis ---
    max_lag = 20
    lags = np.arange(1, max_lag + 1)
    msd_values = np.zeros(max_lag)
    n_samples = np.zeros(max_lag)
    
    for p in particles:
        pos_um = p * pixel_size_um
        N = len(pos_um)
        
        for lag in lags:
            if N > lag:
                displacements = pos_um[lag:] - pos_um[:-lag]
                sq_displacements = np.sum(displacements**2, axis=1)
                msd_values[lag-1] += np.sum(sq_displacements)
                n_samples[lag-1] += len(sq_displacements)
    
    avg_msd = msd_values / n_samples
    time_lags = lags * dt
    
    # Linear fit: MSD = 4*D*t for 2D diffusion
    fit_limit = 10
    t_fit = time_lags[:fit_limit]
    m_fit = avg_msd[:fit_limit]
    
    def msd_func(t, D):
        return 4 * D * t
    
    popt, pcov = curve_fit(msd_func, t_fit, m_fit)
    D_msd = popt[0]
    
    print(f"[{filename_tag}] MSD Analysis:")
    print(f"  D (from MSD slope): {D_msd:.4e} um^2/s")
    
    # Plot MSD
    plt.figure(figsize=(10, 6))
    plt.plot(time_lags, avg_msd, 'o-', label='MSD Data')
    plt.plot(t_fit, msd_func(t_fit, D_msd), 'r--', linewidth=2, label=f'Linear fit (D={D_msd:.2f})')
    plt.title(f"Mean Squared Displacement - {filename_tag}")
    plt.xlabel('Time lag (s)')
    plt.ylabel('MSD (um^2)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig(f"msd_{filename_tag}.png", dpi=150)
    plt.show()
    
    # --- 3. Theoretical D (Stokes-Einstein) ---
    eta = get_viscosity(TEMP_C)
    r_m = (bead_diameter_um / 2) * 1e-6  # radius in meters
    D_theory_m2s = (KB * TEMP_K) / (6 * np.pi * eta * r_m)
    D_theory_um2s = D_theory_m2s * 1e12  # convert to um^2/s
    
    print(f"[{filename_tag}] Theoretical (Stokes-Einstein):")
    print(f"  Temperature: {TEMP_K:.1f} K")
    print(f"  Viscosity: {eta:.5f} Pa.s")
    print(f"  Radius: {r_m:.2e} m")
    print(f"  D (theory): {D_theory_um2s:.4e} um^2/s")
    print("-" * 50)
    
    # --- 4. Trajectory Plot ---
    plt.figure(figsize=(10, 10))
    colors = cm.rainbow(np.linspace(0, 1, len(top_particles)))
    
    for i, (p, c) in enumerate(zip(top_particles, colors)):
        pos_um = p * pixel_size_um
        # Plot relative to starting position
        plt.plot(pos_um[:, 0] - pos_um[0, 0], pos_um[:, 1] - pos_um[0, 1],
                 '-o', markersize=2, linewidth=1, label=f'Particle {i+1}', color=c, alpha=0.7)
    
    plt.title(f"Particle Trajectories - {filename_tag}")
    plt.xlabel('X Displacement (um)')
    plt.ylabel('Y Displacement (um)')
    plt.axis('equal')
    plt.grid(True, alpha=0.3)
    if len(top_particles) <= 15:
        plt.legend()
    plt.savefig(f"trajectories_{filename_tag}.png", dpi=150)
    plt.show()
    
    return D_hist, D_msd, D_theory_um2s

## 6. Run Analysis

In [6]:
# File paths - UPDATE THESE TO YOUR DATA LOCATIONS
file_1mu = "1mu-21c-1isto224w-0_5p-trackresults.txt"
file_5mu = "5mu-21c-1isto6_5w-0_5p-trackresults.txt"

# Process 1 micron beads
print("=" * 60)
print("ANALYZING 1 MICRON BEADS")
print("=" * 60)

data_1mu = load_track_data(file_1mu)
if data_1mu is not None:
    parts_1mu = process_particles(data_1mu, max_jump_px=20)
    D_hist_1, D_msd_1, D_theory_1 = analyze_diffusion(parts_1mu, FRAME_RATE, PIXEL_SIZE_UM, 1.0, "1mu_beads")

ANALYZING 1 MICRON BEADS
Loading data from 1mu-21c-1isto224w-0_5p-trackresults.txt...
Error loading 1mu-21c-1isto224w-0_5p-trackresults.txt: 1mu-21c-1isto224w-0_5p-trackresults.txt not found.


In [7]:
# Process 5 micron beads
print("=" * 60)
print("ANALYZING 5 MICRON BEADS")
print("=" * 60)

data_5mu = load_track_data(file_5mu)
if data_5mu is not None:
    parts_5mu = process_particles(data_5mu, max_jump_px=20)
    D_hist_5, D_msd_5, D_theory_5 = analyze_diffusion(parts_5mu, FRAME_RATE, PIXEL_SIZE_UM, 5.0, "5mu_beads")

ANALYZING 5 MICRON BEADS
Loading data from 5mu-21c-1isto6_5w-0_5p-trackresults.txt...
Error loading 5mu-21c-1isto6_5w-0_5p-trackresults.txt: 5mu-21c-1isto6_5w-0_5p-trackresults.txt not found.


## 7. Summary and Comparison

In [8]:
# Print summary table
print("\n" + "=" * 60)
print("RESULTS SUMMARY")
print("=" * 60)
print(f"{'Method':<20} {'1um Beads (um^2/s)':<20} {'5um Beads (um^2/s)'}")
print("-" * 60)

try:
    print(f"{'Histogram':<20} {D_hist_1:<20.4f} {D_hist_5:.4f}")
    print(f"{'MSD':<20} {D_msd_1:<20.4f} {D_msd_5:.4f}")
    print(f"{'Theory (S-E)':<20} {D_theory_1:<20.4f} {D_theory_5:.4f}")
    print("-" * 60)
    print(f"\nRatio D(1um)/D(5um):")
    print(f"  Histogram: {D_hist_1/D_hist_5:.2f} (expected: 5.0)")
    print(f"  MSD: {D_msd_1/D_msd_5:.2f} (expected: 5.0)")
except NameError:
    print("Run the analysis cells above first!")


RESULTS SUMMARY
Method               1um Beads (um^2/s)   5um Beads (um^2/s)
------------------------------------------------------------
Run the analysis cells above first!
