In [1]:
WORKING_DIR = "/home/xavier/Documents/DAE_project"

# Image selection
This is the notebook to select and crop images for illustration.

In [2]:
import pandas as pd
import os
import shutil

# Get the annotation of movie folders

In [3]:
# Define the root directory containing the datasets
ROOT_DIR = f"{WORKING_DIR}/dataset/Roy_training/images"
# Define the path to the label file
LABEL_DIR = f"{WORKING_DIR}/dataset/Roy_training/Caro_3d_9.7.22_2.20_new.xlsx"
container = []

# Loop through each item in the root directory
for strain in os.listdir(ROOT_DIR):
    run_id = int(strain.split("Run")[1])  # Extract run ID from the folder name

    # Construct the path to the current strain folder
    strain_path = os.path.join(ROOT_DIR, strain)

    # Loop through each scope folder within the current strain folder
    for scope in os.listdir(strain_path):
        # Construct the full path to the current scope folder
        scope_path = os.path.join(strain_path, scope)

        # Append the run ID, scope ID, and the scope folder path to the container list
        container.append([run_id, int(scope[-2:]), scope_path])

# Convert the container list to a DataFrame
container = pd.DataFrame(container, columns=["Run", "Scope", "Directory"])

# Load the label file into a DataFrame
label_df = pd.read_excel(LABEL_DIR)

# Merge the container DataFrame with the label DataFrame based on the "Run" column
# This assumes 'Run' is a column in both DataFrames. If 'Run' is an index in label_df,
# you might first need to reset the index using label_df.reset_index().
annotated_container = pd.merge(container, label_df, on="Run")

# If the 'Run' column is not named the same in both DataFrames, you would specify
# left_on and right_on instead of just on, e.g.:
# annotated_container = pd.merge(container, label_df, left_on="Run", right_on="YourRunColumnNameInLabelDF")
annotated_container

Unnamed: 0,Run,Scope,Directory,Mutant #,Reference,QC,Movies,FinalFramePhenotype,Source,QC- Error Code:
0,636,38,/home/xavier/Documents/DAE_project/dataset/Roy...,DK425,https://journals.asm.org/doi/abs/10.1128/jb.17...,Pass,3.0,Weak,"LJ Shimkets, D Kaiser - Journal of Bacteriolog...",
1,636,37,/home/xavier/Documents/DAE_project/dataset/Roy...,DK425,https://journals.asm.org/doi/abs/10.1128/jb.17...,Pass,3.0,Weak,"LJ Shimkets, D Kaiser - Journal of Bacteriolog...",
2,636,39,/home/xavier/Documents/DAE_project/dataset/Roy...,DK425,https://journals.asm.org/doi/abs/10.1128/jb.17...,Pass,3.0,Weak,"LJ Shimkets, D Kaiser - Journal of Bacteriolog...",
3,672,3,/home/xavier/Documents/DAE_project/dataset/Roy...,DK1253,https://www.pnas.org/doi/epdf/10.1073/pnas.76....,Pass,3.0,Weak,"L Kroos, A Kuspa, D Kaiser - Journal of bacter...",
4,672,2,/home/xavier/Documents/DAE_project/dataset/Roy...,DK1253,https://www.pnas.org/doi/epdf/10.1073/pnas.76....,Pass,3.0,Weak,"L Kroos, A Kuspa, D Kaiser - Journal of bacter...",
...,...,...,...,...,...,...,...,...,...,...
932,287,31,/home/xavier/Documents/DAE_project/dataset/Roy...,10536,,Pass,3.0,Weak,,
933,287,32,/home/xavier/Documents/DAE_project/dataset/Roy...,10536,,Pass,3.0,Weak,,
934,526,30,/home/xavier/Documents/DAE_project/dataset/Roy...,DK5257,https://genesdev.cshlp.org/content/1/8/840.short,?,3.0,Spots,"L Kroos, D Kaiser - Genes & development, 1987 ...",
935,526,29,/home/xavier/Documents/DAE_project/dataset/Roy...,DK5257,https://genesdev.cshlp.org/content/1/8/840.short,?,3.0,Spots,"L Kroos, D Kaiser - Genes & development, 1987 ...",


# Get selected images

In [4]:
OUT_DIR = f"{WORKING_DIR}/images/figure4/original"
os.makedirs(OUT_DIR, exist_ok=True)
SELECTED_FRAMES = [1441]  # [1, 289, 577, 721, 865, 1153, 1441]
selected_strains = [1622, 2232, 3557, 5205, 8615, 1224]  #[8615, 5205, 2232, 3557, 4398, 3186, 1218, 1253, 1224, 5206]
selected_strains = selected_strains + [f"DK{str(tmp)}" for tmp in selected_strains]
# filtered_df = annotated_container[annotated_container["Mutant #"].isin(selected_strains)]
selected_runs = [432]  # [351, 684, 407, 254, 340, 282, 607, 379, 574, 316, 485, 432, 584, 589]
# filtered_df = annotated_container[annotated_container["Run"].isin(selected_runs)]
filtered_df = annotated_container[annotated_container["Mutant #"].isin(selected_strains)]
for _, row in filtered_df.iterrows():
    images = os.listdir(row["Directory"])
    name_format = images[0][:-8] + "%04d.jpg"
    for selected_frame in SELECTED_FRAMES:
        try:
            shutil.copy(os.path.join(row["Directory"], name_format % selected_frame),
                        os.path.join(OUT_DIR, f"{row['Mutant #']}_" + name_format % selected_frame))
        except FileNotFoundError:
            print(f"{selected_frame} not found in {row['Directory']}")


1441 not found in /home/xavier/Documents/DAE_project/dataset/Roy_training/images/CS2_44_1622_1%agar_Run0320/Scope23
1441 not found in /home/xavier/Documents/DAE_project/dataset/Roy_training/images/CS2_44_1622_1%agar_Run0320/Scope22
1441 not found in /home/xavier/Documents/DAE_project/dataset/Roy_training/images/CS2_44_1622_1%agar_Run0320/Scope24


# Helper functions for crop

In [5]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar


def resize_crop(img_name, resize_by=1., resolution=512, brightness_norm=True, brightness_mean=107):
    if isinstance(img_name, str):
        img = cv2.imread(img_name, cv2.IMREAD_UNCHANGED)
    else:
        img = img_name
    if img.dtype != np.uint8:
        img = np.uint8(img / 256)
    if img.ndim == 3:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_shape = img.shape
    resize_shape = np.array([img_shape[1] * resize_by, img_shape[0] * resize_by], dtype=int)
    if resize_by != 1:
        img = cv2.resize(img, resize_shape, cv2.INTER_LANCZOS4)
    img = img[(resize_shape[1] - resolution) // 2:(resize_shape[1] + resolution) // 2,
    (resize_shape[0] - resolution) // 2:(resize_shape[0] + resolution) // 2]
    if brightness_norm:
        obj_v = np.mean(img)
        value = brightness_mean - obj_v
        img = cv2.add(img, value)
    return img


def add_scale_bar(ax, length_px, length_unit, bar_height=5, bar_color='white', text_color='white', font_size=10,
                  location=(0.05, 0.05)):
    """
    Adds a scale bar to a Matplotlib axis.

    :param ax: Matplotlib axis
    :param length_px: Length of the scale bar in pixels
    :param length_unit: Unit of length (e.g., '1 mm')
    :param bar_height: Height of the scale bar in pixels
    :param bar_color: Color of the scale bar
    :param text_color: Color of the text
    :param font_size: Font size of the text
    :param location: Tuple indicating the relative position of the scale bar in the axes (from 0 to 1)
    """
    # Calculate scale bar position
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    x_length = xlim[1] - xlim[0]
    y_length = ylim[1] - ylim[0]

    # Position in axes coordinates
    x = xlim[0] + x_length * location[0]
    y = ylim[0] + y_length * location[1]

    # Add rectangle for scale bar
    rectangle = plt.Rectangle((x, y), length_px, bar_height, color=bar_color)
    ax.add_patch(rectangle)

    # Add text
    ax.text(x + length_px / 2, y + bar_height * 2, length_unit, color=text_color, ha='center', va='bottom',
            fontsize=font_size)

    # Plot image
    fig, ax = plt.subplots()
    ax.imshow(image, cmap='gray')
    add_scale_bar(ax, length_px=20, length_unit='1 mm')  # Adjust length_px and length_unit based on your needs

    # Hide axes
    ax.axis('off')

    # Show the figure
    plt.show()

# Crop images and reconstruct
We changed the noise_const to make the consistent aggregate positions for each generation with the same noise seed.

In [6]:
SOURCE = f"{WORKING_DIR}/images/figure4/original"
OUT_DIR = f"{WORKING_DIR}/images/figure4/cropped"
# MODEL_DIR = "/home/xavier/PycharmProjects/training-runs/e13/00001-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-000756.pkl"
MODEL_DIR = f"{WORKING_DIR}/models/network-snapshot-003024-patched.pkl"

import dnnlib
import legacy
import torch
from training.networks_stylegan2 import SynthesisLayer

os.environ['CC'] = "/usr/bin/gcc-9"
os.environ['CXX'] = "/usr/bin/g++-9"
device = torch.device('cuda')


def reset_noise_const(G, seed):
    torch.manual_seed(seed)

    for block in G.synthesis.children():
        for layer in block.children():
            if layer.__class__.__name__ == "SynthesisLayer":  #isinstance(layer, SynthesisLayer):
                resolution = layer.resolution

                with torch.no_grad():
                    layer.noise_const.copy_(torch.randn([resolution, resolution]))


with dnnlib.util.open_url(MODEL_DIR) as fp:
    models = legacy.load_network_pkl(fp)
    E = models['E_ema'].to(device)
    G = models['G_ema'].to(device)  # type: ignore
os.makedirs(OUT_DIR, exist_ok=True)
for image in os.listdir(SOURCE):
    processed = resize_crop(os.path.join(SOURCE, image))
    cv2.imwrite(os.path.join(OUT_DIR, image), processed)
    img = np.array(processed) / 127.5 - 1
    img = torch.Tensor(img).to(device)
    img = img[None, None, :, :]

    z, logvar = E.mu_var(img, None)
    reset_noise_const(G, 0)
    synth_image = G(z, None, noise_mode="const")
    synth_image = (synth_image + 1) * 127.5
    synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[0, :, :, 0]
    cv2.imwrite(os.path.join(OUT_DIR, "const1_" + image), synth_image)
    synth_image2 = G(z, None, noise_mode="random")
    synth_image2 = (synth_image2 + 1) * 127.5
    synth_image2 = synth_image2.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[0, :, :, 0]
    cv2.imwrite(os.path.join(OUT_DIR, "random1_" + image), synth_image2)
    reset_noise_const(G, 20)
    synth_image = G(z, None, noise_mode="const")
    synth_image = (synth_image + 1) * 127.5
    synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[0, :, :, 0]
    cv2.imwrite(os.path.join(OUT_DIR, "const2_" + image), synth_image)

Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.


# Multiple linear interpolations

In [7]:
# Linear interpolation with replicate generation using reset_noise_const
INTERP_OUT_DIR = f"{WORKING_DIR}/images/figure4/interpolation/2232-1622/linear_interpolations"
os.makedirs(INTERP_OUT_DIR, exist_ok=True)

# Check if output images already exist
inner_num = 6
n_replicas = 300
expected_count = inner_num * n_replicas + 2  # +2 for original images (image0.tif and image5.tif)
existing_files = len([f for f in os.listdir(INTERP_OUT_DIR) if f.endswith('.tif')])

if existing_files >= expected_count:
    print(f"Output images already constructed in {INTERP_OUT_DIR} ({existing_files} files). Skipping.")
else:
    # source images
    img1_dir = f"{WORKING_DIR}/images/figure4/original/DK2232_Run0351_scope40-00_1441.jpg"
    img2_dir = f"{WORKING_DIR}/images/figure4/original/1622_Run0432_scope8-00_1441.jpg"
    img1 = resize_crop(img1_dir)
    img2 = resize_crop(img2_dir)

    cv2.imwrite(os.path.join(INTERP_OUT_DIR, f"image0.tif"), img1)
    cv2.imwrite(os.path.join(INTERP_OUT_DIR, f"image{inner_num-1}.tif"), img2)
    # number of replicate noise constants (changeable)

    reset_noise_const(G, 4)
    imgs = torch.Tensor([img1, img2]).to(device).to(torch.float32) / 127.5 - 1
    imgs = imgs[:, None, :, :]
    z, _ = E.mu_var(imgs, None)
    z_orint = z[1, :] - z[0, :]

    step_size = 1 / (inner_num - 1)
    z_interpolate = torch.cat([(z[0, :] + z_orint * (i * step_size))[None, :] for i in range(inner_num)])
    new_zs = z_interpolate

    # Generate replicate images by resetting noise_const for each seed
    for r in range(n_replicas):
        reset_noise_const(G, r)
        synth_batch = G(new_zs, None, noise_mode='const')
        synth_batch = (synth_batch + 1) * 127.5
        synth_batch = synth_batch.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[:, :, :, 0]
        for i in range(inner_num):
            out_path = os.path.join(INTERP_OUT_DIR, f"image{i}_rep{r}.tif")
            cv2.imwrite(out_path, synth_batch[i])

    print(f"Saved {n_replicas} replicates for {inner_num} interpolated steps to: {INTERP_OUT_DIR}")

# Optionally also save a single-ws synthesis (one replicate) for reference
# reset_noise_const(G, 0)
# ws = G.mapping(z, None)
# ws_interpolate = torch.cat([(ws[0, :, :] + (ws[1, :, :] - ws[0, :, :]) * (i * step_size))[None, :] for i in range(inner_num)])
# synth_ws = G.synthesis(ws_interpolate, noise_mode='const')
# synth_ws = (synth_ws + 1) * 127.5
# synth_ws = synth_ws.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[:, :, :, 0]
# for i in range(inner_num):
#     cv2.imwrite(os.path.join(INTERP_OUT_DIR, f"ws_image{i}.tif"), synth_ws[i])

print("Done!")

Output images already constructed in /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/linear_interpolations (1802 files). Skipping.
Done!


# Get human defined features
* Run image segmentation in MATLAB
  20100604-STIC/Agg_feat_ext_loop.m

In [13]:
import os
import glob
import cv2
import numpy as np
import pandas as pd
from skimage import measure
from skimage.filters import threshold_otsu
import matplotlib.pyplot as plt

# ==========================================
# Configuration & Directories
# ==========================================
SEG_DIR = f"{WORKING_DIR}/images/figure4/interpolation/2232-1622/segmenteds"
OUT_DIR = f"{WORKING_DIR}/images/figure4/interpolation/2232-1622/analysises"
os.makedirs(OUT_DIR, exist_ok=True)

# Analysis Parameters
TOTAL_REPLICATES_TO_CHECK = 300  # Total pool of noise codes to scan
TARGET_VALID_REPLICATES = 100    # Stop after finding this many good replicates
IMAGES_PER_REPLICATE = 6         # Number of interpolation steps (0 to 5)

# Image scaling params
original_size = [512, 512]
final_size = [600, 600]
ratio = 600 / 512

# Quality thresholds (scaled for 600x600 resolution)
AREA_MIN_SCALED = 100 / (1600/1296)**2 * (600/512)**2  # ~39.5
AREA_MAX_SCALED = 100000 / (1600/1296)**2 * (600/512)**2  # ~39500
ECCENTRICITY_THRESHOLD = 0.8
AXIS_RATIO_THRESHOLD = 3.0

# ==========================================
# Helper Functions
# ==========================================

def compute_stats(region_props):
    """
    Computes statistical features for a list of region properties.
    Returns a dictionary of tuples (mean, min, max, median, sum/std, std).
    """
    if not region_props:
        # Return zeros if no regions found
        return {
            "area": (0, 0, 0, 0, 0, 0),
            "perimeter": (0, 0, 0, 0, 0),
            "eccentricity": (0, 0, 0, 0, 0),
            "axis_length": (0, 0, 0, 0, 0),
            "count": 0
        }

    # Extract raw lists
    areas = [p.area for p in region_props]
    perimeters = [p.perimeter for p in region_props]
    eccentricities = [p.eccentricity for p in region_props]
    axis_lengths = [p.major_axis_length for p in region_props]

    # Calculate stats
    # Note: Area is scaled back by ratio^2 and multiplied by 4 (for pixel_size=2um -> 4um^2)
    # Lengths are scaled back by ratio and multiplied by 2 (for pixel_size=2um)
    
    stats = {
        "area": (
            np.mean(areas) / (ratio ** 2) * 4, 
            np.min(areas) / (ratio ** 2) * 4, 
            np.max(areas) / (ratio ** 2) * 4,
            np.median(areas) / (ratio ** 2) * 4, 
            np.std(areas) / (ratio ** 2) * 4,
            np.sum(areas) / (ratio ** 2) * 4
        ),
        "perimeter": (
            np.mean(perimeters) / ratio * 2, 
            np.min(perimeters) / ratio * 2, 
            np.max(perimeters) / ratio * 2,
            np.median(perimeters) / ratio * 2, 
            np.std(perimeters) / ratio * 2
        ),
        "eccentricity": (
            np.mean(eccentricities), 
            np.min(eccentricities), 
            np.max(eccentricities), 
            np.median(eccentricities),
            np.std(eccentricities)
        ),
        "axis_length": (
            np.mean(axis_lengths) / ratio * 2, 
            np.min(axis_lengths) / ratio * 2,
            np.max(axis_lengths) / ratio * 2,
            np.median(axis_lengths) / ratio * 2,
            np.std(axis_lengths) / ratio * 2
        ),
        "count": len(region_props)
    }
    return stats

def check_quality(props):
    """
    Checks if the segmentation quality is acceptable for a single image.
    Returns True if passed, False otherwise.
    """
    if len(props) == 0:
        return False

    too_many_small = 0
    too_big = False
    high_ecc_count = 0
    bad_axis_ratio = 0
    
    for prop in props:
        if prop.area < AREA_MIN_SCALED:
            too_many_small += 1
        elif prop.area > AREA_MAX_SCALED:
            too_big = True
        
        if prop.eccentricity > ECCENTRICITY_THRESHOLD:
            high_ecc_count += 1
        
        # Avoid division by zero for minor axis
        if prop.minor_axis_length > 0:
            if prop.major_axis_length > AXIS_RATIO_THRESHOLD * prop.minor_axis_length:
                bad_axis_ratio += 1
        else:
             bad_axis_ratio += 1

    # Quality Criteria
    if too_many_small > len(props) // 2:
        return False
    if too_big:
        return False
    if high_ecc_count > len(props) // 2:
        return False
    if bad_axis_ratio > 0:
        return False
        
    return True

# ==========================================
# Main Processing Loop
# ==========================================

# Define column names for the output DataFrame
feature_names = [
    "area_mean_um2", "area_min_um2", "area_max_um2", "area_median_um2", "area_std_um2", "area_sum_um2",
    "perimeter_mean_um", "perimeter_min_um", "perimeter_max_um", "perimeter_median_um", "perimeter_std_um",
    "eccentricity_mean", "eccentricity_min", "eccentricity_max", "eccentricity_median", "eccentricity_std",
    "axis_length_mean_um", "axis_length_min_um", "axis_length_max_um", "axis_length_median_um", "axis_length_std_um",
    "count"
]
cols = ["Replicate_ID", "Image_Step"] + feature_names

valid_data = []
collected_count = 0

print(f"Starting scan of {TOTAL_REPLICATES_TO_CHECK} replicates. Target: {TARGET_VALID_REPLICATES} valid sets.")

# Iterate through replicates (noise codes)
for rep_idx in range(TOTAL_REPLICATES_TO_CHECK):
    if collected_count >= TARGET_VALID_REPLICATES:
        print(f"\nTarget reached! Collected {collected_count} valid replicates.")
        break

    temp_replicate_rows = []
    replicate_valid = True
    
    # Check all 6 interpolation steps for this replicate
    for img_idx in range(IMAGES_PER_REPLICATE):
        # Construct filepath: e.g., image0_rep0.tif, image1_rep0.tif, etc.
        filename = f"image{img_idx}_rep{rep_idx}.tif"
        fpath = os.path.join(SEG_DIR, filename)
        
        if not os.path.exists(fpath):
            # Try alternative naming if necessary, or just fail
            replicate_valid = False
            # print(f"  [Rep {rep_idx}] Missing file: {filename}") # Optional logging
            break
            
        # Load Image
        bw_img = cv2.imread(fpath, cv2.IMREAD_UNCHANGED)
        if bw_img is None:
            replicate_valid = False
            break

        # Binarize
        try:
            thresh = threshold_otsu(bw_img)
        except Exception:
            thresh = np.mean(bw_img)
            
        bw = bw_img > thresh
        labeled_bw = measure.label(bw)
        props = measure.regionprops(labeled_bw)

        # 1. Run Quality Check
        if not check_quality(props):
            replicate_valid = False
            # print(f"  [Rep {rep_idx}] Quality check failed on {filename}") # Optional logging
            break
            
        # 2. Compute Features (only if QC passed)
        stats = compute_stats(props)
        
        # Flatten features into a list
        row_features = []
        row_features.extend(stats["area"])
        row_features.extend(stats["perimeter"])
        row_features.extend(stats["eccentricity"])
        row_features.extend(stats["axis_length"])
        row_features.append(stats["count"])
        
        # Add metadata (Replicate ID and Image Step)
        full_row = [rep_idx, img_idx] + row_features
        temp_replicate_rows.append(full_row)

    # If all images for this replicate passed
    if replicate_valid and len(temp_replicate_rows) == IMAGES_PER_REPLICATE:
        valid_data.extend(temp_replicate_rows)
        collected_count += 1
        if collected_count % 10 == 0:
            print(f"  -> Collected {collected_count} valid replicates...")
    else:
        pass 
        # Optionally log why it failed (e.g., "Skipped Rep {rep_idx}")

# ==========================================
# Save Results
# ==========================================

if collected_count == 0:
    print("\nWarning: No valid replicates found matching criteria.")
else:
    df = pd.DataFrame(valid_data, columns=cols)
    
    # Save the detailed long-format data
    out_csv = os.path.join(OUT_DIR, "selected_replicates_features.csv")
    df.to_csv(out_csv, index=False)
    
    print(f"\nProcessing complete.")
    print(f"Total valid replicates: {collected_count}")
    print(f"Total rows generated: {len(df)} (Should be {collected_count * 6})")
    print(f"Data saved to: {out_csv}")
    print("\nFirst 5 rows of data:")
    print(df.head())

Starting scan of 300 replicates. Target: 100 valid sets.
  -> Collected 10 valid replicates...
  -> Collected 20 valid replicates...
  -> Collected 30 valid replicates...
  -> Collected 40 valid replicates...
  -> Collected 50 valid replicates...
  -> Collected 60 valid replicates...
  -> Collected 70 valid replicates...
  -> Collected 80 valid replicates...
  -> Collected 90 valid replicates...
  -> Collected 100 valid replicates...

Target reached! Collected 100 valid replicates.

Processing complete.
Total valid replicates: 100
Total rows generated: 600 (Should be 600)
Data saved to: /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/analysises/selected_replicates_features.csv

First 5 rows of data:
   Replicate_ID  Image_Step  area_mean_um2  area_min_um2  area_max_um2  \
0             1           0   10828.665535   1392.275911  27685.319111   
1             1           1    8375.164718   1153.433600  23619.174400   
2             1           2    6874.446332 

# Get feature graphs

In [15]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from skimage import measure
from skimage.filters import threshold_otsu
from scipy.stats import t

# ==========================================
# Configuration
# ==========================================

# Use the same directory settings as the extraction script
SEG_DIR = f"{WORKING_DIR}/images/figure4/interpolation/2232-1622/segmenteds"
OUT_DIR = f"{WORKING_DIR}/images/figure4/interpolation/2232-1622/analysises"
INPUT_CSV = os.path.join(OUT_DIR, "selected_replicates_features.csv")
os.makedirs(OUT_DIR, exist_ok=True)

# Parameters
IMAGES_PER_REPLICATE = 6  # Steps 0 to 5
REF_INDICES = [0, IMAGES_PER_REPLICATE - 1]  # [0, 5]

# Image scaling params (Must match extraction script)
ratio = 600 / 512

# Feature names (Must match extraction script)
feature_names = [
    "area_mean_um2", "area_min_um2", "area_max_um2", "area_median_um2", "area_std_um2", "area_sum_um2",
    "perimeter_mean_um", "perimeter_min_um", "perimeter_max_um", "perimeter_median_um", "perimeter_std_um",
    "eccentricity_mean", "eccentricity_min", "eccentricity_max", "eccentricity_median", "eccentricity_std",
    "axis_length_mean_um", "axis_length_min_um", "axis_length_max_um", "axis_length_median_um", "axis_length_std_um",
    "count"
]

# ==========================================
# Visualization Settings for Adobe Illustrator
# ==========================================
# Set font type to 42 (TrueType) to ensure text is editable in Illustrator/PDF
# instead of being converted to outlines (paths).
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
# Optional: Set a font family that is standard (e.g., Arial or Helvetica)
plt.rcParams['font.family'] = 'sans-serif' 

# ==========================================
# Helper Functions
# ==========================================

def compute_stats(region_props):
    """
    Computes statistical features for a list of region properties.
    """
    if not region_props:
        return {
            "area": (0, 0, 0, 0, 0, 0),
            "perimeter": (0, 0, 0, 0, 0),
            "eccentricity": (0, 0, 0, 0, 0),
            "axis_length": (0, 0, 0, 0, 0),
            "count": 0
        }

    areas = [p.area for p in region_props]
    perimeters = [p.perimeter for p in region_props]
    eccentricities = [p.eccentricity for p in region_props]
    axis_lengths = [p.major_axis_length for p in region_props]

    stats = {
        "area": (
            np.mean(areas) / (ratio ** 2) * 4, np.min(areas) / (ratio ** 2) * 4, np.max(areas) / (ratio ** 2) * 4,
            np.median(areas) / (ratio ** 2) * 4, np.std(areas) / (ratio ** 2) * 4, np.sum(areas) / (ratio ** 2) * 4
        ),
        "perimeter": (
            np.mean(perimeters) / ratio * 2, np.min(perimeters) / ratio * 2, np.max(perimeters) / ratio * 2,
            np.median(perimeters) / ratio * 2, np.std(perimeters) / ratio * 2
        ),
        "eccentricity": (
            np.mean(eccentricities), np.min(eccentricities), np.max(eccentricities), np.median(eccentricities),
            np.std(eccentricities)
        ),
        "axis_length": (
            np.mean(axis_lengths) / ratio * 2, np.min(axis_lengths) / ratio * 2, np.max(axis_lengths) / ratio * 2,
            np.median(axis_lengths) / ratio * 2, np.std(axis_lengths) / ratio * 2
        ),
        "count": len(region_props)
    }
    return stats

def get_single_image_features(filepath):
    """
    Loads an image, processes it, and returns a dictionary of features mapping to feature_names.
    """
    if not os.path.exists(filepath):
        print(f"Warning: Reference file not found: {filepath}")
        return None

    bw_img = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
    if bw_img is None:
        print(f"Warning: Could not read image: {filepath}")
        return None

    try:
        thresh = threshold_otsu(bw_img)
    except Exception:
        thresh = np.mean(bw_img)
    
    bw = bw_img > thresh
    labeled_bw = measure.label(bw)
    props = measure.regionprops(labeled_bw)
    
    # Compute raw stats
    stats = compute_stats(props)
    
    # Flatten to list consistent with feature_names
    flat_features = []
    flat_features.extend(stats["area"])
    flat_features.extend(stats["perimeter"])
    flat_features.extend(stats["eccentricity"])
    flat_features.extend(stats["axis_length"])
    flat_features.append(stats["count"])
    
    return dict(zip(feature_names, flat_features))

def ci95(std, n):
    """Calculates 95% Confidence Interval margin."""
    std = np.asarray(std, float)
    n = np.asarray(n, float)
    out = np.full_like(std, np.nan, dtype=float)
    # Avoid division by zero or invalid sqrt
    mask = np.isfinite(std) & (n > 1)
    # t.ppf(0.975, df) is the two-tailed critical value for 95% CI
    out[mask] = t.ppf(0.975, df=n[mask]-1) * (std[mask] / np.sqrt(n[mask]))
    return out

# ==========================================
# Main Analysis Logic
# ==========================================

print("--- Step 1: Loading Replicate Data ---")
if not os.path.exists(INPUT_CSV):
    raise FileNotFoundError(f"Input file {INPUT_CSV} not found. Please run feature extraction first.")

df = pd.read_csv(INPUT_CSV)
print(f"Loaded {len(df)} rows from {INPUT_CSV}")

# Group by 'Image_Step' to get stats across replicates
grouped = df.groupby("Image_Step")[feature_names]
df_mean = grouped.mean().reindex(range(IMAGES_PER_REPLICATE))
df_std = grouped.std().reindex(range(IMAGES_PER_REPLICATE))
df_count = grouped.count().iloc[:, 0].reindex(range(IMAGES_PER_REPLICATE)) # Count of valid replicates per step

# Calculate 95% CI margins
df_ci = pd.DataFrame(index=df_mean.index, columns=feature_names)
for col in feature_names:
    df_ci[col] = ci95(df_std[col].values, df_count.values)

print("--- Step 2: Processing Reference Images (Ground Truth) ---")
ref_data = {}
for idx in REF_INDICES:
    # Filenames for original images: image0.tif and image5.tif
    fname = f"image{idx}.tif"
    fpath = os.path.join(SEG_DIR, fname)
    print(f"Processing Reference Image: {fname}")
    
    feats = get_single_image_features(fpath)
    if feats:
        ref_data[idx] = feats
    else:
        print(f"  -> Failed to extract features for {fname}")

# ==========================================
# Visualization
# ==========================================
print("--- Step 3: Generating Plots ---")

alphas = np.arange(IMAGES_PER_REPLICATE)

# 计算每个特征的全局 y 轴范围，以确保所有图表对齐
print("Computing global y-axis limits for each feature...")
feature_ylims = {}
for feat in feature_names:
    all_vals = []
    
    # 收集重建数据的所有值（均值 + CI 界限）
    mean_vals = df_mean[feat].values
    ci_vals = df_ci[feat].values
    all_vals.extend(mean_vals - ci_vals)  # Lower CI bound
    all_vals.extend(mean_vals + ci_vals)  # Upper CI bound
    
    # 收集参考数据的所有值
    for ref_idx in REF_INDICES:
        if ref_idx in ref_data and feat in ref_data[ref_idx]:
            all_vals.append(ref_data[ref_idx][feat])
    
    # 计算最小值/最大值，加上 10% 的 padding
    all_vals = np.array([v for v in all_vals if np.isfinite(v)])
    if len(all_vals) > 0:
        v_min = np.min(all_vals)
        v_max = np.max(all_vals)
        v_range = v_max - v_min
        padding = v_range * 0.1 if v_range > 0 else 0.1
        feature_ylims[feat] = (v_min - padding, v_max + padding)
    else:
        feature_ylims[feat] = (0, 1)

# 3A. Line Plots for each feature
for feat in feature_names:
    mean_vals = df_mean[feat].values
    ci_vals = df_ci[feat].values
    
    plt.figure(figsize=(7, 5))
    
    # 1. Plot Replicate Trajectory (Mean + CI)
    plt.plot(alphas, mean_vals, marker="o", linewidth=2, label="Reconstructed (Mean)", color="#1f77b4")
    
    # Fill CI
    lower_bound = mean_vals - ci_vals
    upper_bound = mean_vals + ci_vals
    # Handle NaNs in CI
    valid_mask = np.isfinite(mean_vals) & np.isfinite(ci_vals)
    if np.any(valid_mask):
         plt.fill_between(alphas[valid_mask], lower_bound[valid_mask], upper_bound[valid_mask], 
                          color="#1f77b4", alpha=0.2, label="95% CI")

    # 2. Plot Reference Points (Stars)
    # Start (Image 0)
    if 0 in ref_data and feat in ref_data[0]:
        val0 = ref_data[0][feat]
        plt.scatter(0, val0, marker="*", s=200, color="red", zorder=10, 
                    edgecolor="black", label="Original Image 0")
        
    # End (Image 5)
    end_idx = IMAGES_PER_REPLICATE - 1
    if end_idx in ref_data and feat in ref_data[end_idx]:
        val_end = ref_data[end_idx][feat]
        plt.scatter(end_idx, val_end, marker="*", s=200, color="orange", zorder=10, 
                    edgecolor="black", label="Original Image 5")

    # 设置固定的 y 轴范围以确保所有图表的纵轴对齐
    plt.ylim(feature_ylims[feat])

    # plt.title(f"Feature: {feat}")
    plt.xlabel("Interpolation Step")
    plt.ylabel("Value")
    plt.xticks(alphas, [f"Step {i}" for i in alphas])
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.5)
    
    # 使用固定的左边界而不是 tight_layout，确保所有图表的左边界对齐
    plt.subplots_adjust(left=0.15, right=0.95, top=0.95, bottom=0.1)
    
    # Save as PNG (preview) and PDF (Illustrator editable)
    out_png = os.path.join(OUT_DIR, f"Plot_{feat}_with_Ref.png")
    out_pdf = os.path.join(OUT_DIR, f"Plot_{feat}_with_Ref.pdf")
    
    plt.savefig(out_png, dpi=300)
    plt.savefig(out_pdf, format='pdf', transparent=True)
    plt.close()

print("Saved line plots (PNG and PDF).")

# 3B. Heatmap (Z-scored)
# Structure: [Ref0, Step0, Step1 ... Step5, Ref5]
# Note: This allows comparison of how 'far' the start/end reconstructions are from ground truth
heatmap_cols = []
heatmap_labels = []

# Prepare Reference 0 Column
if 0 in ref_data:
    col_ref0 = pd.Series(ref_data[0])
    heatmap_cols.append(col_ref0)
    heatmap_labels.append("Ref_0")

# Prepare Replicate Columns (Means)
for i in range(IMAGES_PER_REPLICATE):
    heatmap_cols.append(df_mean.loc[i])
    heatmap_labels.append(f"Rec_{i}")

# Prepare Reference 5 Column
end_idx = IMAGES_PER_REPLICATE - 1
if end_idx in ref_data:
    col_refEnd = pd.Series(ref_data[end_idx])
    heatmap_cols.append(col_refEnd)
    heatmap_labels.append(f"Ref_{end_idx}")

# Construct DataFrame
if heatmap_cols:
    hm_df = pd.concat(heatmap_cols, axis=1)
    hm_df.columns = heatmap_labels
    
    # Calculate Z-score per feature (row-wise)
    # (x - mean) / std
    hm_mean = hm_df.mean(axis=1)
    hm_std = hm_df.std(axis=1)
    # Avoid div by zero
    hm_std[hm_std == 0] = 1.0
    
    hm_z = hm_df.sub(hm_mean, axis=0).div(hm_std, axis=0)
    
    # Plot
    plt.figure(figsize=(12, 8))
    sns.heatmap(hm_z, cmap="coolwarm", center=0, annot=False, linewidths=.5, square=False)
    # plt.title("Z-Score Heatmap: Originals vs Reconstructions")
    plt.tight_layout()
    
    # Save as PNG (preview) and PDF (Illustrator editable)
    out_hm_png = os.path.join(OUT_DIR, "Heatmap_Features_Ref_vs_Rec.png")
    out_hm_pdf = os.path.join(OUT_DIR, "Heatmap_Features_Ref_vs_Rec.pdf")
    
    plt.savefig(out_hm_png, dpi=300)
    plt.savefig(out_hm_pdf, format='pdf', transparent=True)
    plt.close()
    print(f"Saved heatmap to {out_hm_png} and {out_hm_pdf}")

# ==========================================
# 4. Save Summary Table
# ==========================================
summary_df = df_mean.copy()
summary_df.columns = [f"{c}_mean" for c in summary_df.columns]

# Add Std columns
for col in feature_names:
    summary_df[f"{col}_std"] = df_std[col]
    
# Add Ref columns (optional, strictly for the requested table)
# We will create a separate row or column for Reference values? 
# Usually, a table comparing values is best.
# Let's create a table that has rows: Ref0, Rec0...Rec5, Ref5

table_rows = []
# Row for Ref 0
if 0 in ref_data:
    row = {"Type": "Original", "Step": 0}
    row.update(ref_data[0])
    table_rows.append(row)

# Rows for Replicates
for i in range(IMAGES_PER_REPLICATE):
    row = {"Type": "Reconstructed_Mean", "Step": i}
    for feat in feature_names:
        row[feat] = df_mean.loc[i, feat]
        row[f"{feat}_std"] = df_std.loc[i, feat] # Add std as separate col
    table_rows.append(row)

# Row for Ref End
if end_idx in ref_data:
    row = {"Type": "Original", "Step": end_idx}
    row.update(ref_data[end_idx])
    table_rows.append(row)

final_table = pd.DataFrame(table_rows)
table_csv = os.path.join(OUT_DIR, "summary_comparison_table.csv")
final_table.to_csv(table_csv, index=False)
print(f"Saved summary table to {table_csv}")


--- Step 1: Loading Replicate Data ---
Loaded 600 rows from /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/analysises/selected_replicates_features.csv
--- Step 2: Processing Reference Images (Ground Truth) ---
Processing Reference Image: image0.tif
Processing Reference Image: image5.tif
--- Step 3: Generating Plots ---
Computing global y-axis limits for each feature...
Saved line plots (PNG and PDF).
Saved heatmap to /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/analysises/Heatmap_Features_Ref_vs_Rec.png and /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/analysises/Heatmap_Features_Ref_vs_Rec.pdf
Saved summary table to /home/xavier/Documents/DAE_project/images/figure4/interpolation/2232-1622/analysises/summary_comparison_table.csv


# Add scale bar

In [None]:
img_name = "/home/xavier/PycharmProjects/imgs_for_publication/WT_examples/1622_Run0432_scope7-00_1441.jpg"
image = resize_crop(img_name)

# Create subplot
fig, ax = plt.subplots()
ax.axis("off")

# Plot image
ax.imshow(image, cmap="gray", vmin=0, vmax=255)

# Create scale bar
scalebar = ScaleBar(2, "um", length_fraction=0.25)
ax.add_artist(scalebar)
# fig.tight_layout()
# # Show
# plt.savefig(os.path.join(OUT_DIR, "scalebar_" + os.path.basename(img_name)), bbox_inches='tight', pad_inches=0)
fig.canvas.draw()
image_with_scalebar = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
image_with_scalebar = image_with_scalebar.reshape(fig.canvas.get_width_height()[::-1] + (3,))

cv2.imwrite(os.path.join(OUT_DIR, "scalebar_" + os.path.basename(img_name)),
            cv2.cvtColor(image_with_scalebar, cv2.COLOR_RGB2BGR))


# Reconstruct Shimkets' images

In [17]:
import os
import cv2


def extract_frame(video_path, frame_number):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Unable to open video file {video_path}")
        return

    # Set the frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

    # Read the frame
    ret, frame = cap.read()
    if ret:
        # Save the frame as an image
        ans = frame  # cv2.imwrite(output_path, frame)
    else:
        print(f"Error: Unable to read frame {frame_number} from {video_path}")

    # Release the video capture object
    cap.release()
    return ans


inner_num = 5
fp = "/media/xavier/Storage/feature_extraction/movie/movies"
# DK1622 with IPTG
# movie1 = "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_1/LS3934 010614_563.avi"
# movie1 = "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_0/LS3934 6N 021014_786.avi"
# movie2 = "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_6/LS3934 60N 030414_543.avi"
movies = {0: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_1/LS3934 010614_563.avi",
          2: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_5/LS3934 2N 022814_573.avi",
          20: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_4/LS3934 20N 031014_582.avi",
          200: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_3/LS3934 200N 022414_588.avi",
          2000: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_2/LS3934 2000N 022214_579.avi"
          }
movies = {0: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_1/LS3934 010614_563.avi",
          6: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_0/LS3934 6N 021014_786.avi",
          60: "/media/xavier/Storage/feature_extraction/movie/movies/LS3934_6/LS3934 60N 030414_543.avi"
          }
OUT_DIR = "/home/xavier/PycharmProjects/imgs_for_publication/figure3/interpolation/LS3934_6"
for m1 in movies:
    movie1 = movies[m1]
    for m2 in movies:
        if m1 == m2:
            continue
        movie2 = movies[m2]
        current_out = os.path.join(OUT_DIR, f"{m1}_{m2}.png")
        os.makedirs(current_out, exist_ok=True)
        img1 = extract_frame(movie1, 500)
        img2 = extract_frame(movie2, 500)
        img1 = resize_crop(img1, resize_by=23 / 20)
        img2 = resize_crop(img2, resize_by=23 / 20)
        name1 = movie1.split("/")[-1][:-4] + '.png'
        name2 = movie2.split("/")[-1][:-4] + '.png'
        cv2.imwrite(os.path.join(current_out, name1), img1)
        cv2.imwrite(os.path.join(current_out, name2), img2)

        reset_noise_const(G, 4)
        imgs = torch.Tensor([img1, img2]).to(device).to(torch.float32) / 127.5 - 1
        imgs = imgs[:, None, :, :]
        z, _ = E.mu_var(imgs, None)
        z_orint = z[1, :] - z[0, :]
        z_orient_np = z_orint.cpu().numpy()
        print("dist=%f" % np.mean(np.square(z_orient_np)))
        print(z_orient_np / np.sqrt(np.mean(np.square(z_orient_np))))

        step_size = 1 / (inner_num - 1)
        z_interpolate = torch.cat([(z[0, :] + z_orint * (i * step_size))[None, :] for i in range(inner_num)])
        new_zs = z_interpolate  # torch.cat([z, z_interpolate], dim=0)
        ws = G.mapping(z, None)
        w_orint = ws[1, :, :] - ws[0, :, :]
        w_orient_np = w_orint[0, :].cpu().numpy()
        # print(w_orient_np / np.sqrt(np.mean(np.square(w_orient_np))))

        ws_interpolate = torch.cat([(ws[0, :, :] + w_orint * (i * step_size))[None, :] for i in range(inner_num)])
        synth_image = G(new_zs, None, noise_mode='const')
        synth_image = (synth_image + 1) * 127.5
        synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[:, :, :, 0]
        for i in range(inner_num):
            cv2.imwrite(os.path.join(current_out, f"image{i}.png"), synth_image[i])
            # PIL.Image.fromarray(synth_image[i], 'L').save(f'{recon_dir}/image{i}.pdf')

        synth_image = G.synthesis(ws_interpolate, noise_mode='const')
        synth_image = (synth_image + 1) * 127.5
        synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()[:, :, :, 0]
        for i in range(inner_num):
            cv2.imwrite(os.path.join(current_out, f"ws_image{i}.png"), synth_image[i])
            # PIL.Image.fromarray(synth_image[i], 'L').save(f'{recon_dir}/ws_image{i}.pdf')

        print("Done!")

dist=8.108855
[-1.329601    0.39474863  0.73954904  1.9658827   0.12316137 -2.135191
 -0.17577764  0.6221569  -0.67273796 -0.45459506  0.49929696  0.7063217
 -0.51487744]
Done!
dist=7.981588
[-1.270959    0.20502125  1.0424296   1.8978251  -0.3397985  -1.8977989
  0.7800083   0.79577076 -0.07292919 -0.9170438   0.44937772  0.7122258
 -0.3741231 ]
Done!
dist=8.108855
[ 1.329601   -0.39474863 -0.73954904 -1.9658827  -0.12316137  2.135191
  0.17577764 -0.6221569   0.67273796  0.45459506 -0.49929696 -0.7063217
  0.51487744]
Done!
dist=1.207601
[ 0.17565313 -0.49039993  0.77105755 -0.2121663  -1.1860625   0.64896584
  2.4716263   0.43802774  1.5529954  -1.1809089  -0.1344085  -0.00310794
  0.36876637]
Done!
dist=7.981588
[ 1.270959   -0.20502125 -1.0424296  -1.8978251   0.3397985   1.8977989
 -0.7800083  -0.79577076  0.07292919  0.9170438  -0.44937772 -0.7122258
  0.3741231 ]
Done!
dist=1.207601
[-0.17565313  0.49039993 -0.77105755  0.2121663   1.1860625  -0.64896584
 -2.4716263  -0.4380277

In [6]:
import os
import pandas as pd

fp = "/media/xavier/Storage/feature_extraction/movie/movies"
ans = []
for item in os.listdir(fp):
    ans.extend(os.listdir(os.path.join(fp, item)))
df = pd.DataFrame(ans, columns=['movie'])
df['movie'] = df['movie'].sort_values().unique()
df.to_csv("/media/xavier/Storage/feature_extraction/movie/movies.csv", index=False)

In [None]:
import os
import cv2


def extract_frame(video_path, frame_number):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Unable to open video file {video_path}")
        return

    # Set the frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

    # Read the frame
    ret, frame = cap.read()
    if ret:
        # Save the frame as an image
        ans = frame  # cv2.imwrite(output_path, frame)
    else:
        print(f"Error: Unable to read frame {frame_number} from {video_path}")

    # Release the video capture object
    cap.release()
    return ans

In [18]:
import cv2

fp = "/media/xavier/Storage/feature_extraction/movie/movies"
OUT_DIR = "/media/xavier/Storage/feature_extraction/movie/LS3934"
os.makedirs(OUT_DIR, exist_ok=True)
ans = []
for item in os.listdir(fp):
    if item.startswith('LS3934'):
        for movie_name in os.listdir(os.path.join(fp, item)):
            print(os.path.join(fp, item, movie_name))
            # # Open the video file
            # cap = cv2.VideoCapture(os.path.join(fp, item, movie_name))
            #
            # # Get the total number of frames
            # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            #
            # # Set the video position to the last frame
            # cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count - 1)
            #
            # # Read the last frame
            # ret, last_frame = cap.read()
            # print(item, movie_name)
            # # Check if the frame was successfully read
            # if ret:
            #     cv2.imwrite(os.path.join(OUT_DIR, item + '_' + movie_name.split(".")[0]) + '.jpg', last_frame)
            #     cv2.waitKey(0)  # Wait until a key is pressed
            #     cv2.destroyAllWindows()
            # else:
            #     print("Failed to read the last frame.")
            #
            # # Release the video capture object
            # cap.release()


/media/xavier/Storage/feature_extraction/movie/movies/LS3934_5/LS3934 2N 022014_565.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_5/LS3934 2N 022814_573.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_5/LS3934 2N 030614_615.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_6/LS3934 60N 030214_538.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_6/LS3934 60N 030414_543.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_6/LS3934 60N 032214_540.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_7/LS3934 .2N 013114_532.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_7/LS3934 0.2N 030814_509.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_8/LS3934 0.6N 031214_596.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_8/LS3934 0.6N 031414_567.avi
/media/xavier/Storage/feature_extraction/movie/movies/LS3934_8/LS3934 0.6N 031614_625.avi
/media/xavier/Storag