In [None]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib
import os

print("=== Preeclampsia Severity Classifier ===")

# ===== DATA LOADING =====
try:
    df = pd.read_csv("realistic_noisy_preeclampsia_dataset_noisy.csv")
    print("✅ Dataset loaded. Shape:", df.shape)
    
    # Verify required columns
    required_cols = ['Systolic_BP', 'Diastolic_BP', 'Protein_Urine', 'Severity']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    
    print("\n📊 Severity Distribution:")
    print(df['Severity'].value_counts())

except Exception as e:
    print(f"❌ Data loading failed: {str(e)}")
    raise

# ===== DATA PREPROCESSING =====
# Calculate MAP
df['MAP'] = (df['Systolic_BP'] + 2 * df['Diastolic_BP']) / 3

# Encode severity
encoder = LabelEncoder()
df['Severity_Encoded'] = encoder.fit_transform(df['Severity'])
severity_labels = encoder.classes_

# Prepare features
X = df[['Systolic_BP', 'Diastolic_BP', 'Protein_Urine', 'MAP']]
y = df['Severity_Encoded']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = LogisticRegression(multi_class='multinomial', max_iter=1000)
model.fit(X_train_scaled, y_train)

# Save artifacts
joblib.dump(model, 'severity_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(encoder, 'severity_encoder.pkl')
print("\n✅ Saved: severity_model.pkl, scaler.pkl, severity_encoder.pkl")

# ===== PREDICTION =====
def predict_severity():
    """Predict using input.txt data."""
    try:
        # Load input
        with open("input.txt", "r") as f:
            systolic, diastolic, protein = map(float, f.read().split())
        
        # Calculate MAP
        map_val = (systolic + 2 * diastolic) / 3
        
        # Create feature vector
        features = pd.DataFrame([[systolic, diastolic, protein, map_val]],
                              columns=['Systolic_BP', 'Diastolic_BP', 'Protein_Urine', 'MAP'])
        
        # Scale and predict
        features_scaled = scaler.transform(features)
        pred = model.predict(features_scaled)[0]
        proba = model.predict_proba(features_scaled)[0]
        
        # Format output
        print("\n" + "="*50)
        print("🔍 Prediction Results:")
        print(f"BP: {systolic}/{diastolic} | Protein: {protein}")
        print(f"Predicted Severity: {severity_labels[pred]}")
        print("Probabilities:")
        for i, label in enumerate(severity_labels):
            print(f"  {label}: {proba[i]:.1%}")
        print("="*50)
        
    except Exception as e:
        print(f"❌ Prediction error: {str(e)}")

# Execute prediction
predict_severity()

🎯 Found CSV file at: realistic_noisy_preeclampsia_dataset_noisy.csv
CSV FILE CHECK
✅ FILE FOUND: realistic_noisy_preeclampsia_dataset_noisy.csv
📁 File size: 890698 bytes (869.82 KB)
📖 Reading first 5 lines of the file:
   Line 1: Name,Systolic_BP,Diastolic_BP,MAP,MAP_Status,Protein_Urine,Protein_Status,Preeclampsia_Status
   Line 2: Afia Adjaye,29.59953996648197,93.83738751402589,73.4603726081396,mild,5,high,no_preeclampsia
   Line 3: Yaa Asante,53.895423679490186,95.96556314590312,87.2404216130795,mild,4,high,no_preeclampsia
   Line 4: Adwoa Addo,117.83002986601315,62.64401481064027,82.90921988122848,mild,2,high,no_preeclampsia
   Line 5: Yaa Amoah,109.0794244581381,71.20109823845983,84.33735639522337,mild,2,high,no_preeclampsia

DATASET LOADING
🔄 Attempting to load CSV file...
✅ CSV DATASET LOADED SUCCESSFULLY!
📊 Dataset shape: (10000, 8)
📋 Columns in dataset: ['Name', 'Systolic_BP', 'Diastolic_BP', 'MAP', 'MAP_Status', 'Protein_Urine', 'Protein_Status', 'Preeclampsia_Status']
🔢 Numb

Enter values:  130 150 2



PREDICTION RESULTS
📊 Input values: {'Systolic_BP': 130.0, 'Diastolic_BP': 150.0, 'Protein_Urine': 2.0}
🧮 Calculated MAP: 143.33
🔮 Predicted Preeclampsia Status: severe
📈 Prediction Probabilities:
   mild: 0.000 (0.0%)
   moderate: 0.006 (0.6%)
   no_preeclampsia: 0.000 (0.0%)
   severe: 0.994 (99.4%)
🏷️  Data source used for training: CSV_FILE
