##7 contruct a model if person suffering from disease Z give x variables blood perssure fever vomit & diabetes and y variable if suffering? check / predict when  
Blood Pressure (low/normal/high): high
Fever (nofever/mild/high): nofever
Vomit (yes/no): yes
Diabetes (yes/no): yes

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


manual_blood_pressure = ['high', 'high', 'low', 'normal', 'normal', 'normal', 'low', 'high', 'high', 'normal', 'high', 'low', 'low', 'normal']
manual_fever = ['high', 'high', 'high', 'mild', 'nofever', 'nofever', 'nofever', 'mild', 'nofever', 'mild', 'mild', 'mild', 'high', 'mild']
manual_vomit = ['no', 'yes', 'no', 'no', 'no', 'yes', 'yes', 'no', 'no', 'no', 'yes', 'yes', 'no', 'yes']
manual_diabetes = ['yes', 'yes', 'yes', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'yes']
manual_disease_z = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']


if not (len(manual_blood_pressure) == len(manual_fever) == len(manual_vomit) == len(manual_diabetes) == len(manual_disease_z)):
    raise ValueError("All data lists must have the same length.")


data = pd.DataFrame({
    'blood_pressure': manual_blood_pressure,
    'fever': manual_fever,
    'vomit': manual_vomit,
    'diabetes': manual_diabetes,
    'disease_z': manual_disease_z
})

print("Your Input Data Head:")
print(data.head())


ordinal_features = ['fever', 'blood_pressure']
nominal_features = ['vomit', 'diabetes']

fever_categories = ['nofever', 'mild', 'high']
blood_pressure_categories = ['low', 'normal', 'high']
ordinal_transformer = OrdinalEncoder(categories=[fever_categories, blood_pressure_categories])
onehot_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('ord', ordinal_transformer, ordinal_features),
        ('onehot', onehot_transformer, nominal_features)
    ],
    remainder='passthrough'
)


X = data[['blood_pressure', 'fever', 'vomit', 'diabetes']]
y = data['disease_z'].map({'no': 0, 'yes': 1})


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


rf_model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

rf_model_pipeline.fit(X_train, y_train)

print("\nRandom Forest Model training complete.")


lr_model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])

lr_model_pipeline.fit(X_train, y_train)

print("Logistic Regression Model training complete.")



rf_predictions = rf_model_pipeline.predict(X_test)


lr_predictions = lr_model_pipeline.predict(X_test)

print("\n--- Random Forest Model Performance ---")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Precision:", precision_score(y_test, rf_predictions))
print("Recall:", recall_score(y_test, rf_predictions))
print("F1 Score:", f1_score(y_test, rf_predictions))


print("\n--- Logistic Regression Model Performance ---")
print("Accuracy:", accuracy_score(y_test, lr_predictions))
print("Precision:", precision_score(y_test, lr_predictions))
print("Recall:", recall_score(y_test, lr_predictions))
print("F1 Score:", f1_score(y_test, lr_predictions))



print("\nEnter new patient information for classification:")
user_blood_pressure = input("Blood Pressure (low/normal/high): ").strip().lower()
user_fever = input("Fever (nofever/mild/high): ").strip().lower()
user_vomit = input("Vomit (yes/no): ").strip().lower()
user_diabetes = input("Diabetes (yes/no): ").strip().lower()


new_input_data = pd.DataFrame({
    'blood_pressure': [user_blood_pressure],
    'fever': [user_fever],
    'vomit': [user_vomit],
    'diabetes': [user_diabetes]
})



prediction = rf_model_pipeline.predict(new_input_data)


print("\nPrediction Result (using Random Forest):")
if prediction[0] == 1:
    print("  The person is likely SUFFERING from Disease Z.")
else:
    print(" The person is likely NOT suffering from Disease Z.")

Your Input Data Head:
  blood_pressure    fever vomit diabetes disease_z
0           high     high    no      yes        no
1           high     high   yes      yes        no
2            low     high    no      yes       yes
3         normal     mild    no      yes       yes
4         normal  nofever    no       no       yes

Random Forest Model training complete.
Logistic Regression Model training complete.

--- Random Forest Model Performance ---
Accuracy: 0.6666666666666666
Precision: 1.0
Recall: 0.5
F1 Score: 0.6666666666666666

--- Logistic Regression Model Performance ---
Accuracy: 0.6666666666666666
Precision: 0.6666666666666666
Recall: 1.0
F1 Score: 0.8

Enter new patient information for classification:
Blood Pressure (low/normal/high): high 
Fever (nofever/mild/high): nofever
Vomit (yes/no): yes
Diabetes (yes/no): yes

Prediction Result (using Random Forest):
 The person is likely NOT suffering from Disease Z.


In [None]:


from sklearn.model_selection import cross_val_score, StratifiedKFold



cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

rf_cv_scores = cross_val_score(rf_model_pipeline, X, y, cv=cv, scoring='accuracy')

lr_cv_scores = cross_val_score(lr_model_pipeline, X, y, cv=cv, scoring='accuracy')

print("\n--- Cross-Validation Results (Accuracy) ---")
print("Random Forest:", rf_cv_scores)
print("Mean Accuracy (Random Forest):", np.mean(rf_cv_scores))
print("\nLogistic Regression:", lr_cv_scores)
print("Mean Accuracy (Logistic Regression):", np.mean(lr_cv_scores))




--- Cross-Validation Results (Accuracy) ---
Random Forest: [1.         1.         0.33333333 0.33333333 0.5       ]
Mean Accuracy (Random Forest): 0.6333333333333334

Logistic Regression: [1.         1.         0.66666667 0.66666667 0.5       ]
Mean Accuracy (Logistic Regression): 0.7666666666666666
