In [1]:
#all values in pixels
import os
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
from skimage.filters import threshold_li

# Specify the directory containing your TIFF files
directory = r'C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo'

# Create an empty DataFrame to store the length data
length_df = pd.DataFrame()
# Create an empty DataFrame to store the differences
difference_df = pd.DataFrame(columns=['Filename', 'First_Row_Length', 'Last_Row_Length', 'Difference'])

# List all files in the directory with a '.tif' extension
tif_files = [file for file in os.listdir(directory) if file.endswith('.tif')]

# Iterate over each TIFF file in the directory
for filename in tif_files:
    # Create the full path to the current TIFF file
    full_path = os.path.join(directory, filename)

    # Open the TIFF file
    im = Image.open(full_path)

    # Convert image to numpy array
    im_array = np.array(im)

    # Threshold value using threshold_li from skimage
    threshold = threshold_li(im_array)

    # Extract length of longest continuous line of "white" pixels in each row
    longest_lengths = []
    for row in range(im_array.shape[0]):
        row_data = im_array[row, :]
        longest_length = 0
        current_length = 0
        for pixel in row_data:
            if pixel >= threshold:  # Treat as "white" pixel
                current_length += 1
                if current_length > longest_length:
                    longest_length = current_length
            else:  # Treat as "black" pixel
                current_length = 0
        longest_lengths.append(longest_length)

    # Add the longest length data to the DataFrame with the TIFF name as the index
    length_df[filename] = longest_lengths

    # Calculate the longest contiguous line in the first and last row
    first_row_length = longest_lengths[0]
    last_row_length = longest_lengths[-1]
    length_difference = last_row_length - first_row_length

    ##Debugging prints
    #print(f"Processing file: {filename}")
    #print(f"First row longest length: {first_row_length}")
    #print(f"Last row longest length: {last_row_length}")
    #print(f"Difference: {length_difference}\n")

    # Add the differences to the difference DataFrame
    new_row = pd.DataFrame({
        'Filename': [filename],
        'First_Row_Length': [first_row_length],
        'Last_Row_Length': [last_row_length],
        'Difference': [length_difference]
    })
    difference_df = pd.concat([difference_df, new_row], ignore_index=True)

    # Plotting each TIFF file's longest length line
    plt.plot(longest_lengths, label=filename)

# Add plot labels and legend
plt.xlabel('Row Number')
plt.ylabel('Longest Continuous Length')
plt.title('Longest Continuous Length vs. Row Number')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))  # Adjust legend position
plt.grid(True)

# Save the plot as a JPEG file
plot_file_name = os.path.join(directory, 'longest_lengths_plot_LysLabel_FullKymo.jpeg')
plt.savefig(plot_file_name, format='jpeg', bbox_inches='tight')
plt.close()

# Save the length DataFrame to an Excel file
output_filename = 'longest_length_data_LysLabel_FullKymo.xlsx'
length_df.to_excel(os.path.join(directory, output_filename))

# Save the difference DataFrame to an Excel file
difference_output_filename = 'difference_data_LysLabel_FullKymo.xlsx'
difference_df.to_excel(os.path.join(directory, difference_output_filename), index=False)

# Plot histograms for first_row_length and last_row_length
bin_size = 2

# Difference histogram
plt.figure()
plt.hist(difference_df['Difference'], bins=range(int(difference_df['Difference'].min()), int(difference_df['Difference'].max()) + bin_size, bin_size))
plt.xlabel('Length (pixels)')
plt.ylabel('Frequency')
plt.title('Histogram of Length Differences')
first_row_hist_file = os.path.join(directory, 'Difference_histogram.jpeg')
plt.savefig(first_row_hist_file, format='jpeg')
plt.close()

# First row length histogram
plt.figure()
plt.hist(difference_df['First_Row_Length'], bins=range(int(difference_df['First_Row_Length'].min()), int(difference_df['First_Row_Length'].max()) + bin_size, bin_size))
plt.xlabel('Length (pixels)')
plt.ylabel('Frequency')
plt.title('Histogram of First Row Lengths')
first_row_hist_file = os.path.join(directory, 'first_row_length_histogram.jpeg')
plt.savefig(first_row_hist_file, format='jpeg')
plt.close()

# Last row length histogram
plt.figure()
plt.hist(difference_df['Last_Row_Length'], bins=range(int(difference_df['Last_Row_Length'].min()), int(difference_df['Last_Row_Length'].max()) + bin_size, bin_size))
plt.xlabel('Length (pixels)')
plt.ylabel('Frequency')
plt.title('Histogram of Last Row Lengths')
last_row_hist_file = os.path.join(directory, 'last_row_length_histogram.jpeg')
plt.savefig(last_row_hist_file, format='jpeg')
plt.close()

# Combined histogram plot with separate Y axes
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('Length (pixels)')
ax1.set_ylabel('First Row Length Frequency', color=color)
ax1.hist(difference_df['First_Row_Length'], bins=range(int(difference_df['First_Row_Length'].min()), int(difference_df['First_Row_Length'].max()) + bin_size, bin_size), alpha=0.5, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
color = 'tab:orange'
ax2.set_ylabel('Last Row Length Frequency', color=color)  # we already handled the x-label with ax1
ax2.hist(difference_df['Last_Row_Length'], bins=range(int(difference_df['Last_Row_Length'].min()), int(difference_df['Last_Row_Length'].max()) + bin_size, bin_size), alpha=0.5, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
combined_hist_file = os.path.join(directory, 'combined_length_histogram.jpeg')
plt.title('Histogram of First and Last Row Lengths')
plt.savefig(combined_hist_file, format='jpeg')
plt.close()

print(f'Data saved to {os.path.join(directory, output_filename)}')
print(f'Plot saved to {plot_file_name}')
print(f'Difference data saved to {os.path.join(directory, difference_output_filename)}')
print(f'First row length histogram saved to {first_row_hist_file}')
print(f'Last row length histogram saved to {last_row_hist_file}')
print(f'Combined histogram saved to {combined_hist_file}')


Data saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\longest_length_data_LysLabel_FullKymo.xlsx
Plot saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\longest_lengths_plot_LysLabel_FullKymo.jpeg
Difference data saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\difference_data_LysLabel_FullKymo.xlsx
First row length histogram saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\first_row_length_histogram.jpeg
Last row length histogram saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\last_row_length_histogram.jpeg
Combined histogram saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\combined_length_histogram.jpeg


In [1]:
#Use this. pixel values converted into subunits. frequency of length differences plotted.


import os
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
from skimage.filters import threshold_li

# Conversion factor
conversion_factor = 48.148

# Specify the directory containing your TIFF files
directory = r'C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo'

# Create an empty DataFrame to store the length data
length_df = pd.DataFrame()
# Create an empty DataFrame to store the differences
difference_df = pd.DataFrame(columns=['Filename', 'First_Row_Length', 'Last_Row_Length', 'Difference'])

# List all files in the directory with a '.tif' extension
tif_files = [file for file in os.listdir(directory) if file.endswith('.tif')]

# Iterate over each TIFF file in the directory
for filename in tif_files:
    # Create the full path to the current TIFF file
    full_path = os.path.join(directory, filename)

    # Open the TIFF file
    im = Image.open(full_path)

    # Convert image to numpy array
    im_array = np.array(im)

    # Threshold value using threshold_li from skimage
    threshold = threshold_li(im_array)

    # Extract length of longest continuous line of "white" pixels in each row
    longest_lengths = []
    for row in range(im_array.shape[0]):
        row_data = im_array[row, :]
        longest_length = 0
        current_length = 0
        for pixel in row_data:
            if pixel >= threshold:  # Treat as "white" pixel
                current_length += 1
                if current_length > longest_length:
                    longest_length = current_length
            else:  # Treat as "black" pixel
                current_length = 0
        longest_lengths.append(longest_length)

    # Convert pixel lengths to subunits
    longest_lengths_subunits = [length * conversion_factor for length in longest_lengths]

    # Add the longest length data to the DataFrame with the TIFF name as the index
    length_df[filename] = longest_lengths_subunits

    # Calculate the longest contiguous line in the first and last row
    first_row_length = longest_lengths_subunits[0]
    last_row_length = longest_lengths_subunits[-1]
    length_difference = last_row_length - first_row_length

    # Add the differences to the difference DataFrame
    new_row = pd.DataFrame({
        'Filename': [filename],
        'First_Row_Length': [first_row_length],
        'Last_Row_Length': [last_row_length],
        'Difference': [length_difference]
    })
    difference_df = pd.concat([difference_df, new_row], ignore_index=True)

    # Plotting each TIFF file's longest length line
    plt.plot(longest_lengths_subunits, label=filename)

# Add plot labels and legend
plt.xlabel('Row Number')
plt.ylabel('Longest Continuous Length (Subunits)')
plt.title('Longest Continuous Length vs. Row Number')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))  # Adjust legend position
plt.grid(True)

# Save the plot as a JPEG file
plot_file_name = os.path.join(directory, 'longest_lengths_plot_LysLabel_FullLengthKymo.jpeg')
plt.savefig(plot_file_name, format='jpeg', bbox_inches='tight')
plt.close()

# Save the length DataFrame to an Excel file
output_filename = 'longest_length_data_LysLabel_FullLengthKymo.xlsx'
length_df.to_excel(os.path.join(directory, output_filename))

# Save the difference DataFrame to an Excel file
difference_output_filename = 'difference_data_LysLabel_FullLengthKymo.xlsx'
difference_df.to_excel(os.path.join(directory, difference_output_filename), index=False)

# Plot histograms for first_row_length and last_row_length
bin_size = 2 * conversion_factor

# Difference histogram
plt.figure()
plt.hist(difference_df['Difference'], bins=range(int(difference_df['Difference'].min()), int(difference_df['Difference'].max()) + int(bin_size), int(bin_size)))
plt.xlabel('Length (subunits)')
plt.ylabel('Frequency')
plt.title('Histogram of Length Differences')
first_row_hist_file = os.path.join(directory, 'Difference_histogram_LysLabel_FullLengthKymo.jpeg')
plt.savefig(first_row_hist_file, format='jpeg')
plt.close()

# First row length histogram
#plt.figure()
#plt.hist(difference_df['First_Row_Length'], bins=range(int(difference_df['First_Row_Length'].min()), int(difference_df['First_Row_Length'].max()) + int(bin_size), int(bin_size)))
#plt.xlabel('Length (subunits)')
#plt.ylabel('Frequency')
#plt.title('Histogram of First Row Lengths')
#first_row_hist_file = os.path.join(directory, 'first_row_length_histogramConv.jpeg')
#plt.savefig(first_row_hist_file, format='jpeg')
#plt.close()

# Last row length histogram
#plt.figure()
#plt.hist(difference_df['Last_Row_Length'], bins=range(int(difference_df['Last_Row_Length'].min()), int(difference_df['Last_Row_Length'].max()) + int(bin_size), int(bin_size)))
#plt.xlabel('Length (subunits)')
#plt.ylabel('Frequency')
#plt.title('Histogram of Last Row Lengths')
#last_row_hist_file = os.path.join(directory, 'last_row_length_histogramConv.jpeg')
#plt.savefig(last_row_hist_file, format='jpeg')
#plt.close()

# Combined histogram plot with separate Y axes
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('Length (subunits)')
ax1.set_ylabel('First Row Length Frequency', color=color)
ax1.hist(difference_df['First_Row_Length'], bins=range(int(difference_df['First_Row_Length'].min()), int(difference_df['First_Row_Length'].max()) + int(bin_size), int(bin_size)), alpha=0.5, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
color = 'tab:orange'
ax2.set_ylabel('Last Row Length Frequency', color=color)  # we already handled the x-label with ax1
ax2.hist(difference_df['Last_Row_Length'], bins=range(int(difference_df['Last_Row_Length'].min()), int(difference_df['Last_Row_Length'].max()) + int(bin_size), int(bin_size)), alpha=0.5, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
combined_hist_file = os.path.join(directory, 'combined_length_histogram_LysLabel_FullLengthKymo.jpeg')
plt.title('Histogram of First and Last Row Lengths')
plt.savefig(combined_hist_file, format='jpeg')
plt.close()

print(f'Data saved to {os.path.join(directory, output_filename)}')
print(f'Plot saved to {plot_file_name}')
print(f'Difference data saved to {os.path.join(directory, difference_output_filename)}')
#print(f'First row length histogram saved to {first_row_hist_file}')
#print(f'Last row length histogram saved to {last_row_hist_file}')
print(f'Combined histogram saved to {combined_hist_file}')


Data saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\longest_length_data_LysLabel_FullLengthKymo.xlsx
Plot saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\longest_lengths_plot_LysLabel_FullLengthKymo.jpeg
Difference data saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\difference_data_LysLabel_FullLengthKymo.xlsx
Combined histogram saved to C:\Users\madhu\Desktop\python\KymoAnalysis\For_FINALFIGS\FigSupp_LysineLabel\Lys Full Combo\combined_length_histogram_LysLabel_FullLengthKymo.jpeg


In [None]:
This code processes a set of TIFF images to analyze the longest continuous lines of "white" pixels in each row, converts these lengths to subunits, and generates various plots and reports. Here’s a breakdown of its main features and outputs:
Main Features:

    Image Processing:
        Converts TIFF images to numpy arrays.
        Applies Li's threshold to distinguish between "white" and "black" pixels.

    Length Calculation:
        For each row in the image, calculates the length of the longest continuous line of "white" pixels.
        Converts these pixel lengths into subunits using a conversion factor (48.148).

    Data Storage:
        Stores the longest lengths (in subunits) for each image in a DataFrame.
        Computes and stores the difference between the lengths of the longest lines in the first and last rows of each image.

    Histogram Plotting:
        Plots histograms showing the frequency distribution of:
            Length differences between the first and last rows.
            Lengths of the longest lines in the first row.
            Lengths of the longest lines in the last row.
        Generates a combined histogram showing the distributions of the first and last row lengths on separate y-axes.

    File Output:
        Saves the longest length data and difference data to Excel files.
        Saves various plots as JPEG files.

Outputs:

    Data Files:
        longest_length_dataConv.xlsx: Contains the longest continuous line lengths (in subunits) for each image.
        difference_dataConv.xlsx: Contains the differences between the lengths of the longest lines in the first and last rows of each image, along with the lengths themselves.

    Plots:
        longest_lengths_plotConv.jpeg: Plot showing the longest continuous lengths (in subunits) for each image as a function of row number.
        Difference_histogramConv.jpeg: Histogram of length differences between the first and last rows (in subunits).
        first_row_length_histogramConv.jpeg: Histogram of the longest lengths in the first row (in subunits).
        last_row_length_histogramConv.jpeg: Histogram of the longest lengths in the last row (in subunits).
        combined_length_histogramConv.jpeg: Combined histogram showing the distributions of the first and last row lengths on separate y-axes (in subunits).

Print Statements:

    The code prints messages indicating where the data files and plots have been saved.

This script is useful for analyzing and visualizing the changes in filament lengths across rows in time series images, providing insights into the variability and distribution of these measurements.