# OOP Project Solution
----------------------------------------------------------

In [None]:
import pandas as pd
from datetime import datetime

class LiverSample:
    """
    Represents a liver sample from a biomedical dataset.
    Includes validation and glucose conversion functionality.
    """
    
    def __init__(self, data_row):
        self.sample_id = data_row["Sample ID"]
        self.mass = data_row["Mass (mg)"]
        self.freezer_location = data_row["Freezer Location"]
        self.ldh = data_row["LDH (U/L)"]
        self.blood_glucose_mgdl = data_row["Blood Glucose (mg/dL)"]
        self.glycogen = data_row["Glycogen (mg/g)"]
        self.alt = data_row["ALT (U/L)"]
        self.pH = data_row["pH"]
        self.time_since_collection = data_row["Time Since Collection (min)"]
        self.collection_date = data_row["Collection Date"]
        
        # Determine if the sample is valid
        self.valid = self._validate()

    def _validate(self):
        """Validates the sample data: pH, mass, and collection date format."""
        try:
            # pH must be between 5.5 and 8.5
            if not (5.5 <= self.pH <= 8.5):
                return False
            
            # Mass must be positive
            if self.mass <= 0:
                return False
            
            # Collection date must be in valid YYYY-MM-DD format
            datetime.strptime(self.collection_date, "%Y-%m-%d")
            return True

        except Exception:
            return False

    def convert_glucose_to_mM(self):
        """
        Converts blood glucose from mg/dL to mmol/L (mM).
        Formula: mg/dL ÷ 18.0182
        """
        return round(self.blood_glucose_mgdl / 18.0182, 2)

# -----------------------------------------
# Script to Load, Filter, and Summarize
# -----------------------------------------

# Load the dataset
df = pd.read_csv("./Datasets/liver_samples.csv")

# Create LiverSample instances and filter valid ones
valid_samples = []
invalid_sample_ids = []

for _, row in df.iterrows():
    sample = LiverSample(row)
    if sample.valid:
        valid_samples.append(sample)
    else:
        invalid_sample_ids.append(sample.sample_id)

# Compute average blood glucose in mM for valid samples
if valid_samples:
    avg_glucose_mM = round(
        sum(sample.convert_glucose_to_mM() for sample in valid_samples) / len(valid_samples), 2
    )
else:
    avg_glucose_mM = None

# Print results
print("Liver Sample Data Summary")
print(f"Total samples: {len(df)}")
print(f"Valid samples: {len(valid_samples)}")
print(f"Invalid samples: {len(invalid_sample_ids)}")
print(f"Average blood glucose (mM): {avg_glucose_mM}")
print(f"Invalid Sample IDs: {invalid_sample_ids}")
