In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

test_data = pd.read_csv("/content/sample_data/test_genetic_disorders.csv")
train_data = pd.read_csv("/content/sample_data/train_genetic_disorders.csv")
df_train = train_data.copy()
df_test = test_data.copy()

columns_to_drop = [
    'Patient Id', 'Patient First Name', 'Family Name',
    'Father\'s name', 'Mother\'s age', 'Father\'s age',
    'Institute Name', 'Location of Institute',
    'Status', 'Follow-up', 'Place of birth',
    'Blood test result', 'No. of previous abortion',
    'Birth defects', 'Disorder Subclass'
]

df_train.drop(columns=columns_to_drop, inplace=True, errors='ignore')

df_train.rename(columns={
    'Respiratory Rate (breaths/min)': 'Respiratory Rate',
    'Heart Rate (rates/min)': 'Heart Rate',
    'Autopsy shows birth defect (if applicable)': 'Birth Defect',
    'Folic acid details (peri-conceptional)': 'Folic Acid',
    'Assisted conception IVF/ART': 'IVF/ART',
    'White Blood cell count (thousand per microliter)': 'White Blood Cell Count'
}, inplace=True)

numerical_cols = df_train.select_dtypes(include=['float64', 'int64']).columns
df_train[numerical_cols] = df_train[numerical_cols].fillna(df_train[numerical_cols].median())

categorical_cols = df_train.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df_train[col].fillna(df_train[col].mode()[0], inplace=True)

yes_no_columns = [
    'Paternal gene', 'Maternal gene', 'Inherited from father',
    "Genes in mother's side", 'Parental consent', 'Birth asphyxia',
    'H/O serious maternal illness', 'H/O radiation exposure (x-ray)',
    'H/O substance abuse', 'IVF/ART', 'History of anomalies in previous pregnancies'
]

for col in yes_no_columns:
    df_train[col] = df_train[col].map({'Yes': 1, 'No': 0})

target_column = 'Genetic Disorder'
X = df_train.drop(columns=[target_column])
y = df_train[target_column]

X = pd.get_dummies(X, drop_first=True)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

def predict_disease_risk(user_input):

    input_df = pd.DataFrame([user_input])

    input_df = input_df.reindex(columns=X.columns, fill_value=0)

    prediction_prob = model.predict_proba(input_df)

    highest_risk_percentage = np.max(prediction_prob, axis=1) * 100
    highest_risk_index = np.argmax(prediction_prob, axis=1)

    disease_name = model.classes_[highest_risk_index[0]]

    return disease_name, highest_risk_percentage[0]

def get_user_input():
    user_input = {
        'Patient Age': float(input("Enter Patient Age: ")),
        'Genes in mother\'s side': int(input("Enter Genes in mother's side (1 for Yes, 0 for No): ")),
        'Inherited from father': int(input("Enter Inherited from father (1 for Yes, 0 for No): ")),
        'Maternal gene': int(input("Enter Maternal gene (1 for Yes, 0 for No): ")),
        'Paternal gene': int(input("Enter Paternal gene (1 for Yes, 0 for No): ")),
        'Blood cell count (mcL)': float(input("Enter Blood cell count (mcL): ")),
        'Respiratory Rate': input("Enter Respiratory Rate (e.g., Normal (30-60)): "),
        'Heart Rate': input("Enter Heart Rate (e.g., Normal): "),
        'Test 1': float(input("Enter Test 1 result: ")),
        'Test 2': float(input("Enter Test 2 result: ")),
        'H/O substance abuse': float(input("Enter H/O substance abuse (1 for Yes, 0 for No): ")),
        'IVF/ART': int(input("Enter IVF/ART (1 for Yes, 0 for No): ")),
        'History of anomalies in previous pregnancies': int(input("Enter History of anomalies in previous pregnancies (1 for Yes, 0 for No): ")),
        'White Blood Cell Count': float(input("Enter White Blood Cell Count: ")),
        'Symptom 1': float(input("Enter Symptom 1 (1 for Yes, 0 for No): ")),
        'Symptom 2': float(input("Enter Symptom 2 (1 for Yes, 0 for No): ")),
        'Symptom 3': float(input("Enter Symptom 3 (1 for Yes, 0 for No): ")),
        'Symptom 4': float(input("Enter Symptom 4 (1 for Yes, 0 for No): ")),
        'Symptom 5': float(input("Enter Symptom 5 (1 for Yes, 0 for No): "))
    }
    return user_input

user_input = get_user_input()
disease, risk = predict_disease_risk(user_input)
print(f"The disease with the highest risk is '{disease}' with a risk percentage of {risk:.2f}%.")