In [1]:
# Import necessary libraries
# type: ignore
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Define the paths
def extract_data_from_plot(image_path_or_buffer):
    """
    Extracts x and y data from a plot image.

    Args:
        image_path_or_buffer: Path to the image file or a file-like object.

    Returns:
        pandas.DataFrame: A DataFrame with 'frequency (Hz)' and 'm**2/Hz' columns, or None if extraction fails.
        str: Any warning or error message encountered during extraction.
    """
    try:
        # Load the image using matplotlib
        img = plt.imread(image_path_or_buffer)

        # Convert the image to grayscale (if it's not already)
        if img.ndim == 3:
            img = np.mean(img[..., :3], axis=2)

        # Find the coordinates of the plot line (assuming it's the darkest part)
        y_indices, x_indices = np.where(img < np.percentile(img, 20)) #adjust percentile if needed

        # Extract x and y values based on pixel positions and axis limits
        # These values were obtained by manually inspecting the image.
        x_min, x_max = 0, 1
        y_min, y_max = 0, 3.5

        x_values = np.interp(x_indices, [0, img.shape[1]], [x_min, x_max])
        y_values = np.interp(img.shape[0]-y_indices, [0, img.shape[0]], [y_min, y_max]) #Invert y-axis

        # Create a DataFrame
        df = pd.DataFrame({'frequency (Hz)': x_values, 'm**2/Hz': y_values})

        # Remove duplicate x values, keeping the first occurence. This helps in cleaning up noise.
        df = df.drop_duplicates(subset='frequency (Hz)', keep='first')

        #Sort values by frequency
        df = df.sort_values(by='frequency (Hz)')

        return df, ""

    except FileNotFoundError:
        return None, "Image file not found."
    except Exception as e:
        return None, f"An error occurred: {e}"


In [3]:
# Example usage:
image_file = "calib_irr_011.png"  # Replace with the actual image file path
df, message = extract_data_from_plot(image_file)

if df is not None:
    csv_file = "calib_irr_011.csv"
    df.to_csv(csv_file, index=False)
    print(f"Data extracted and saved to {csv_file}")
elif message:
    print(f"Error: {message}")
else:
    print("Unknown error occurred.")

Data extracted and saved to calib_irr_011.csv


In [4]:
# Note: Adjust the names of the files to both image_file and csv_file, x-axis and y-axis in this page itself (as new files are arrived)