This where where we will write our python code

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib
import sys

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

try:
    # Load the dataset
    data = pd.read_csv('/workspaces/health_tech/Data/coded_data.csv')
    print("Dataset loaded successfully:")
    print(data.head())

    # Define features and targets
    features = ['age', 'sex', 'family_history', 'fatigue', 'weight_loss', 'pain', 'fever',
                'night_sweats', 'bleeding', 'lumps', 'cough', 'bowel_bladder_changes',
                'pain_severity', 'weight_loss_amount', 'bleeding_severity', 'vital_sign_abnormalities']
    X = data[features]
    y_cancer = data['cancer']
    y_emergency = data['emergency']

    # Split the data into training and test sets
    X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer = train_test_split(
        X, y_cancer, test_size=0.2, random_state=42, stratify=y_cancer
    )
    X_train_emergency, X_test_emergency, y_train_emergency, y_test_emergency = train_test_split(
        X, y_emergency, test_size=0.2, random_state=42, stratify=y_emergency
    )

    # Train the cancer prediction model
    cancer_clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    cancer_clf.fit(X_train_cancer, y_train_cancer)

    # Predict and evaluate cancer model
    y_pred_cancer = cancer_clf.predict(X_test_cancer)
    accuracy_cancer = accuracy_score(y_test_cancer, y_pred_cancer)
    print("\nCancer Model Performance:")
    print(f"Accuracy: {accuracy_cancer:.2f}")
    print("Classification Report:")
    print(classification_report(y_test_cancer, y_pred_cancer))

    # Train the emergency prediction model
    emergency_clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    emergency_clf.fit(X_train_emergency, y_train_emergency)

    # Predict and evaluate emergency model
    y_pred_emergency = emergency_clf.predict(X_test_emergency)
    accuracy_emergency = accuracy_score(y_test_emergency, y_pred_emergency)
    print("\nEmergency Model Performance:")
    print(f"Accuracy: {accuracy_emergency:.2f}")
    print("Classification Report:")
    print(classification_report(y_test_emergency, y_pred_emergency))

    # Save the models
    joblib.dump(cancer_clf, 'cancer_model.pkl')
    joblib.dump(emergency_clf, 'emergency_model.pkl')
    print("\nModels saved as 'cancer_model.pkl' and 'emergency_model.pkl'")

    # Optional: Feature importance for cancer model
    feature_importance = pd.DataFrame({
        'feature': features,
        'importance': cancer_clf.feature_importances_
    }).sort_values('importance', ascending=False)
    print("\nFeature Importance for Cancer Model:")
    print(feature_importance)

except FileNotFoundError:
    print("Error: 'coded_data.csv' not found. Ensure it is in the current directory.")
    sys.exit(1)
except Exception as e:
    print(f"An error occurred: {str(e)}")
    sys.exit(1)

Dataset loaded successfully:
   age  sex  family_history  cancer  fatigue  weight_loss  pain  fever  \
0   57    0               0       0        1            0     0      0   
1   47    0               0       0        1            1     0      1   
2   59    0               1       0        0            0     0      1   
3   72    0               0       0        1            0     0      0   
4   46    1               0       0        1            0     0      0   

   night_sweats  bleeding  lumps  cough  bowel_bladder_changes  pain_severity  \
0             0         0      0      0                      0              3   
1             0         0      0      0                      0              1   
2             0         0      0      0                      0              5   
3             0         0      0      0                      0              2   
4             0         0      0      0                      0              6   

   weight_loss_amount  bleeding_severit