<a href="https://colab.research.google.com/github/AdvisoryXpert/RO_Chatbot/blob/main/Design_Based_RO_Projection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.26.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.26.0


In [12]:
import math

In [8]:
import fitz  # PyMuPDF
import re
import pandas as pd
import os

def extract_membrane_specs(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()

    text = re.sub(r'\s+', ' ', text)
    specs = {}

    # Membrane name
    name_match = re.search(r"(CPA\d(-LD)?)", text, re.IGNORECASE)
    if name_match:
        specs["membrane_name"] = name_match.group(1).upper()
    else:
        specs["membrane_name"] = os.path.basename(pdf_path).replace(".pdf", "")

    # Area
    area_match = re.search(r"membrane active area.*?(\d+(\.\d+)?)\s*(m2|m²)", text, re.IGNORECASE)
    if area_match:
        specs["membrane_area_m2"] = float(area_match.group(1))

    # Flow
    flow_match = re.search(r"permeate flow.*?(\d+(\,\d+)?(\.\d+)?)(\s*gpd|\s*m3/d)", text, re.IGNORECASE)
    if flow_match:
        raw = flow_match.group(1).replace(",", "")
        value = float(raw)
        unit = flow_match.group(4).lower()
        if "gpd" in unit:
            value = round((value * 3.785) / 24, 2)
        elif "m3/d" in unit:
            value = round((value * 1000) / 24, 2)
        specs["nominal_flow_lph"] = value

    # Rejection
    rejection_match = re.search(r"salt rejection.*?(\d+(\.\d+)?)%", text, re.IGNORECASE)
    if rejection_match:
        specs["rejection_percent"] = float(rejection_match.group(1))

    # Pressure
    pressure_match = re.search(r"(applied pressure|operating pressure).*?(\d+(\.\d+)?)\s*(psi|psig|bar)", text, re.IGNORECASE)
    if pressure_match:
        pressure_value = float(pressure_match.group(2))
        unit = pressure_match.group(4).lower()
        if "psi" in unit:
            pressure_value = round(pressure_value * 0.06895, 2)
        specs["operating_pressure_bar"] = pressure_value

    # Calculate K = LMH/bar
    try:
        A = specs["membrane_area_m2"]
        Q = specs["nominal_flow_lph"]
        P = specs["operating_pressure_bar"]
        flux = Q / A
        K = round(flux / P, 3)
        specs["K_LMH_per_bar"] = K
    except:
        specs["K_LMH_per_bar"] = None

    return specs


In [9]:
from glob import glob

# Assumes PDFs are in /content/
pdf_files = glob("/content/*.pdf")

membrane_list = []
for pdf in pdf_files:
    data = extract_membrane_specs(pdf)
    membrane_list.append(data)

# Create DataFrame
df = pd.DataFrame(membrane_list)
df = df[["membrane_name", "membrane_area_m2", "nominal_flow_lph", "rejection_percent", "operating_pressure_bar", "K_LMH_per_bar"]]
df

Unnamed: 0,membrane_name,membrane_area_m2,nominal_flow_lph,rejection_percent,operating_pressure_bar,K_LMH_per_bar
0,CPA7-LD,37.2,1813.65,99.8,41.37,1.178
1,CPA3,37.2,1734.79,99.7,41.37,1.127
2,CPA5-LD,37.2,1734.79,99.7,41.37,1.127


In [18]:
def compute_osmotic_pressure(tds_ppm):
    return 0.75 * (tds_ppm / 1000)  # Gives ~3.75 bar for 5000 ppm

In [16]:
def ro_projection(feed_flow_lph, recovery_pct, feed_tds, membrane, elements_in_series=4):
    result = {}

    A = membrane["membrane_area_m2"]
    Q_mem = membrane["nominal_flow_lph"]
    Rj = membrane["rejection_percent"]
    K = membrane["K_LMH_per_bar"]

    # Step 1: Permeate Flow
    Q_perm = feed_flow_lph * (recovery_pct / 100)

    # Step 2: Required Membranes
    membranes_needed = math.ceil(Q_perm / Q_mem)

    # Step 3: Flux
    total_area = membranes_needed * A
    flux = round(Q_perm / total_area, 2)  # LMH

    # Step 4: Osmotic Pressures
    pi_f = compute_osmotic_pressure(feed_tds)
    pi_p = compute_osmotic_pressure(feed_tds * (1 - Rj / 100))
    delta_pi = pi_f - pi_p

    # Step 5: NDP
    NDP = flux / K

    # Step 6: Estimate Pressure Drop
    # Empirical: ~0.5–0.7 bar per element at high recovery (tuned)
    pressure_drop = round(elements_in_series * 0.6, 2)

    # Step 7: Total Feed Pressure
    feed_pressure = round(NDP + delta_pi + pressure_drop, 2)

    # Step 8: Output
    result.update({
        "Q_feed_lph": feed_flow_lph,
        "Q_perm_lph": Q_perm,
        "recovery_%": recovery_pct,
        "membranes_needed": membranes_needed,
        "flux_LMH": flux,
        "NDP_bar": round(NDP, 2),
        "osmotic_pressure_feed_bar": round(pi_f, 2),
        "osmotic_pressure_perm_bar": round(pi_p, 2),
        "osmotic_delta_bar": round(delta_pi, 2),
        "pressure_drop_bar": pressure_drop,
        "required_feed_pressure_bar": feed_pressure,
        "permeate_TDS_ppm": round(feed_tds * (1 - Rj / 100), 2)
    })

    return result


In [20]:
# Assume: 5000 ppm, 50% recovery, 1000 LPH, 1 vessel × 4 membranes
membrane = df[df["membrane_name"] == "CPA3"].iloc[0].to_dict()

result = ro_projection(
    feed_flow_lph=1000,
    recovery_pct=50,
    feed_tds=5000,
    membrane=membrane,
    elements_in_series=4
)

pd.DataFrame(result.items(), columns=["Metric", "Value"])

Unnamed: 0,Metric,Value
0,Q_feed_lph,1000.0
1,Q_perm_lph,500.0
2,recovery_%,50.0
3,membranes_needed,1.0
4,flux_LMH,13.44
5,NDP_bar,11.93
6,osmotic_pressure_feed_bar,3.75
7,osmotic_pressure_perm_bar,0.01
8,osmotic_delta_bar,3.74
9,pressure_drop_bar,2.4
