In [None]:
import pandas as pd

df = pd.read_csv("fetal_health.csv")

# Check for missing values
missing_values = df.isnull().sum()

print("Missing Values:")
print(missing_values)

Missing Values:
baseline value                                            0
accelerations                                             0
fetal_movement                                            0
uterine_contractions                                      0
light_decelerations                                       0
severe_decelerations                                      0
prolongued_decelerations                                  0
abnormal_short_term_variability                           0
mean_value_of_short_term_variability                      0
percentage_of_time_with_abnormal_long_term_variability    0
mean_value_of_long_term_variability                       0
histogram_width                                           0
histogram_min                                             0
histogram_max                                             0
histogram_number_of_peaks                                 0
histogram_number_of_zeroes                                0
histogram_mode          

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Initialize RandomForestClassifier
rf_classifier = RandomForestClassifier()

# Fit the classifier to the training data
rf_classifier.fit(X_train, y_train)

# Predict the labels for the training data
y_train_pred = rf_classifier.predict(X_train)

print('################ For Training Data ##############')
print('\n')

# Evaluate the performance of the classifier for training data
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_accuracy)

# Generate classification report for training data
train_report = classification_report(y_train, y_train_pred)
print("Training Classification Report:")
print(train_report)

# Generate confusion matrix for training data
train_conf_matrix = confusion_matrix(y_train, y_train_pred)
print("Training Confusion Matrix:")
print(train_conf_matrix)

print('\n')
print('################ For Testing Data ##############')
print('\n')

# Predict the labels for the testing data
y_test_pred = rf_classifier.predict(X_test)

# Evaluate the performance of the classifier for testing data
test_accuracy_rf = accuracy_score(y_test, y_test_pred)
print("Testing Accuracy:", test_accuracy_rf)

# Generate classification report for testing data
test_report = classification_report(y_test, y_test_pred)
print("Testing Classification Report:")
print(test_report)

# Generate confusion matrix for testing data
test_conf_matrix = confusion_matrix(y_test, y_test_pred)
print("Testing Confusion Matrix:")
print(test_conf_matrix)

################ For Training Data ##############


Training Accuracy: 0.9988235294117647
Training Classification Report:
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00      1325
         2.0       1.00      0.99      1.00       231
         3.0       1.00      1.00      1.00       144

    accuracy                           1.00      1700
   macro avg       1.00      1.00      1.00      1700
weighted avg       1.00      1.00      1.00      1700

Training Confusion Matrix:
[[1325    0    0]
 [   2  229    0]
 [   0    0  144]]


################ For Testing Data ##############


Testing Accuracy: 0.9436619718309859
Testing Classification Report:
              precision    recall  f1-score   support

         1.0       0.95      0.99      0.97       330
         2.0       0.96      0.70      0.81        64
         3.0       0.91      0.91      0.91        32

    accuracy                           0.94       426
   macro avg       0.9

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Assuming you have already split your data into x_train, x_test, y_train, y_test

# Initialize RandomForestClassifier
rfc = RandomForestClassifier(random_state=42)

# Define the parameter grid to search
param_grid = {
    'n_estimators': [100, 200, 500],
    'max_features': ['sqrt', 'log2'],
    'criterion': ['gini', 'entropy']
}

# Perform train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Initialize GridSearchCV
CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5)

# Perform grid search to find the best parameters
CV_rfc.fit(x_train, y_train)

# Print the best parameters found by GridSearchCV
print("Best Parameters:", CV_rfc.best_params_)

# Calculate the accuracy for the training dataset with tuning
accuracy = CV_rfc.best_score_ * 100
print("Accuracy for our training dataset with tuning is: {:.2f}%".format(accuracy))

# Generate classification report for testing data using the best estimator
print("Classification Report for Testing Data:")
print(classification_report(y_test, CV_rfc.best_estimator_.predict(x_test)))


Best Parameters: {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 100}
Accuracy for our training dataset with tuning is: 93.94%
Classification Report for Testing Data:
              precision    recall  f1-score   support

         1.0       0.97      0.98      0.97       326
         2.0       0.88      0.86      0.87        65
         3.0       0.97      0.89      0.93        35

    accuracy                           0.95       426
   macro avg       0.94      0.91      0.92       426
weighted avg       0.95      0.95      0.95       426



In [None]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Load the trained model
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

# Function to interpret fetal health prediction
def interpret_fetal_health(prediction):
    if prediction == 1.0:
        return "Normal"
    elif prediction == 2.0:
        return "Suspicious - Needs more info"
    elif prediction == 3.0:
        return "Pathological"
    else:
        return "Invalid prediction"

# Function to prompt user for input and make predictions
def predict_fetal_health():
    print("Please enter the following feature values:")

    features = []
    for feature_name in X.columns:
        value = input(f"{feature_name}: ")
        features.append(float(value))

    # Convert the user input into a numpy array and reshape it
    input_data = np.array(features).reshape(1, -1)

    # Make prediction
    prediction = rf_classifier.predict(input_data)[0]

    # Interpret prediction
    interpretation = interpret_fetal_health(prediction)

    # Print prediction and interpretation
    print(f"The predicted fetal health is: {prediction}")
    print(f"Interpretation: {interpretation}")

# Call the function to make predictions based on user input
predict_fetal_health()

Please enter the following feature values:


KeyboardInterrupt: Interrupted by user