<a href="https://colab.research.google.com/github/Clare9766/Size-probability-conversion-method/blob/main/Size_Probability_Based_Method_English_Version2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/Data/Mass/

/content/drive/MyDrive/Data/Mass


In [None]:
## Description:
## Variables include:
## - 'ID' (unique, cannot be repeated)
## - Total abundance count, Proportion of fibers and fragments ('Fiber Number Concentration' and 'Fragment Number Concentration')
## - 'Converted Size Mid-point' (list)
## - 'PDF' or 'CDF' of the size distribution (list)
##   - If provided as PDF, it must be converted to CDF
## - Converted unit: grams (g)

# Convert PDF data to CDF (Optional)

In [None]:
import pandas as pd
import ast
import numpy as np

# Read Excel file
df = pd.read_excel("DATA_WITH_PDF_LIST.xlsx")

# Define a function to parse PDF strings
def parse_pdf_string(pdf_str, row_index):
    try:
        if isinstance(pdf_str, str):
            if not pdf_str.strip().startswith("["):
                pdf_str = "[" + pdf_str.strip() + "]"
            return ast.literal_eval(pdf_str)
        elif isinstance(pdf_str, list):
            return pdf_str
    except Exception as e:
        print(f"⚠️ Failed to parse PDF in row {row_index+2}: {pdf_str}, Error: {e}")
        return np.nan

# Define a function to convert PDF to CDF
def convert_pdf_to_cdf(pdf_list):
    if isinstance(pdf_list, list) and all(isinstance(x, (int, float)) for x in pdf_list):
        cdf_vals = np.cumsum(pdf_list)
        return [round(float(x), 5) for x in cdf_vals]  # Convert to float and keep 5 decimals
    return np.nan

# Parse PDF and generate CDF
df['PDF_list'] = [parse_pdf_string(val, idx) for idx, val in enumerate(df['PDF'])]
df['CDF'] = df['PDF_list'].apply(convert_pdf_to_cdf)

# Drop temporary column (optional)
df.drop(columns=['PDF_list'], inplace=True)

# Export to a new file
df.to_excel("DATA_WITH_CDF_LIST.xlsx", index=False)

print("✅ Processing completed. Results saved as 'DATA_WITH_CDF_LIST.xlsx'")

# Fitting Parameter

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import linregress
import ast

file_path = 'DATA_WITH_CDF_LIST.xlsx'
data_df = pd.read_excel(file_path, engine='openpyxl')


# Define CDF functions
xmin = 0.1


def cdf_functionCFD(x, alpha, lambd):
    return 1 - np.exp(-lambd * x**alpha)

# def pdf_functionCFD(x, lambd, alpha):
#     return lambd * alpha * x**(alpha - 1) * np.exp(-lambd * x**alpha)


# Define the main fitting function
def merge_and_fit_cdf(data_df):
    para_samples = []

    for idx, row in data_df.iterrows():
        if pd.isna(row['Converted Size Mid-point']) or pd.isna(row['CDF']):
            # print(f"⚠️ Row {idx+1} skipped due to missing data.")
            continue  # Skip this row

        try:
            x_intervals = np.array(ast.literal_eval(row['Converted Size Mid-point'])).flatten()
            y_probs = np.array(ast.literal_eval(row['CDF'])).flatten()
        except Exception as e:
            print(f"⚠️ Row {idx+1} parsing failed: {e}")
            continue

        try:
            # Fit using curve_fit (CFD model)
            params_CFD, covariance = curve_fit(cdf_functionCFD, x_intervals, y_probs, p0=[1.0, 1.0])

            # Extract fitted parameters
            alpha_CFD, lambd_CFD = params_CFD

            # Compute R² for CFD model
            y_fit = cdf_functionCFD(x_intervals, *params_CFD)
            slope, intercept, r_CFD, p_CFD, std_err = linregress(y_probs, y_fit)
            r2_CFD = r_CFD**2

        except (RuntimeError, ValueError, SyntaxError) as e:
            print(f"Error at row {idx+1}: {e}")
            continue
            alpha_CFD, lambd_CFD, r2_CFD, p_CFD = np.nan, np.nan, np.nan, np.nan

        para_samples.append({
            'ID': row['ID'],
            'Alpha_CFD': alpha_CFD,
            'Lambda_CFD': lambd_CFD,
            'R2_CFD': r2_CFD,
            'p_value_CFD': p_CFD,
        })
    return pd.DataFrame(para_samples)


# Run the fitting function
para_samples = merge_and_fit_cdf(data_df)
merged_df = pd.merge(data_df, para_samples, on='ID', how='left')
merged_df.to_excel(file_path, index=False)
print(f"✅ Results successfully saved to {file_path}")


# Mass Calculation-CFD-based Method

In [None]:
# Mass Calculation

import numpy as np
import pandas as pd
from scipy.integrate import quad

# File path
file_path = 'DATA_WITH_CDF_LIST.xlsx'
data_df = pd.read_excel(file_path, engine='openpyxl')

R_fiber = 0.01
Rho_fiber = 1.35
Rho_fragment = 0.92
xmin = 0.1

x_intervals = [0.1]


def calculate_cdfCFD(x, alpha, lambd):
    return 1 - np.exp(-lambd * x**alpha)

def calculate_interval_particles(cdf_func, x_min, x_max, total_data, **params):
    return total_data * (cdf_func(x_max, **params) - cdf_func(x_min, **params)) / cdf_func(5 + xmin, **params)

# Calculate fiber mass
def calculate_fiber_mass_cylinder_Simon6(L):
    return np.pi * R_fiber**2 * L * Rho_fiber * 0.6 / 1000

# Calculate fragment mass
def calculate_fragment_mass_elliosoid_Han(L):
    return np.pi / 6 * L**2 * 0.02 * Rho_fragment / 1000

methods = {
    "CFD": {
        "fiber_cdf": calculate_cdfCFD,
        "fiber_mass": calculate_fiber_mass_cylinder_Simon6,
        "fragment_cdf": calculate_cdfCFD,
        "fragment_mass": calculate_fragment_mass_elliosoid_Han
    },
}

def save_mass_data(interval):
    data = []

    for idx, row in data_df.iterrows():
        results = {"ID": row['ID']}  # {"ID": idx + 1}
        fiber_size = row['Fiber Number Concentration']
        fragment_size = row['Fragment Number Concentration']
        alp_CFD = row['Alpha_CFD']
        lam_CFD = row['Lambda_CFD']

        for method_name, method in methods.items():
            total_fiber_mass = 0
            total_fragment_mass = 0

            for i in np.arange(xmin, 5 + xmin, interval):
                mid_point = i + interval / 2

                if method['fiber_cdf'] == calculate_cdfCFD:
                    fiber_particles = calculate_interval_particles(
                        method['fiber_cdf'], i, i + interval, fiber_size,
                        alpha=alp_CFD, lambd=lam_CFD
                    )

                fiber_mass = fiber_particles * method['fiber_mass'](mid_point)
                total_fiber_mass += fiber_mass

                if method['fragment_cdf'] == calculate_cdfCFD:
                    fragment_particles = calculate_interval_particles(
                        method['fragment_cdf'], i, i + interval, fragment_size,
                        alpha=alp_CFD, lambd=lam_CFD
                    )
                fragment_mass = fragment_particles * method['fragment_mass'](mid_point)
                total_fragment_mass += fragment_mass

            total_mass = total_fiber_mass + total_fragment_mass

            results[f"Fiber Mass ({method_name})"] = total_fiber_mass
            results[f"Fragment Mass ({method_name})"] = total_fragment_mass
            results[f"Total Mass ({method_name})"] = total_mass
            # results[f"b/a{method_name}"] = balance / total_mass
            # results[f"a/b{method_name}"] = total_mass / balance
            # results[f"RE{method_name}"] = abs(total_mass - balance) / balance
            # results[f"SE{method_name}"] = (total_mass - balance)**2

        data.append(results)

    df = pd.DataFrame(data)
    merged_df = pd.merge(data_df, df, on='ID', how='left')
    merged_df.to_excel(file_path, index=False)
    print(f"✅ Results successfully saved to {file_path}")

for interval in x_intervals:
    save_mass_data(interval)
