In [None]:
!pip install opendatasets
!pip install pandas
!pip install tpot

In [None]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import opendatasets as od
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


# Ignore all warnings
warnings.filterwarnings("ignore")



In [None]:
od.download("https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification/code")
#32cba7be0f29392dd6e7282c665a3efa#dffdgg#35t5efg

In [None]:
%%time
data=pd.read_csv("/content/fetal-health-classification/fetal_health.csv")
data=data.dropna()
data.head(10)

In [None]:
data.info()

In [None]:
data.dtypes

In [None]:
data.isnull().sum()

In [None]:
data.duplicated().sum()

In [None]:
data.nunique()

In [None]:
data.describe()

In [None]:
data.shape

In [None]:
data['fetal_health'].unique()

In [None]:
sns.countplot(x=data['fetal_health'])
plt.title('fetal health Distribution');

In [None]:
cols = ['accelerations', 'baseline value', 'uterine_contractions', 'severe_decelerations']

# Create subplots
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))

palette = {1.0: 'green', 2.0: 'orange', 3.0: 'red'} #numerical values for the label
# Flatten the 2D array of axes for easy iteration
axes_flat = axes.flatten()
for i, cols in enumerate(cols):
    sns.histplot(data=data, x=cols, bins=30, ax=axes_flat[i], hue='fetal_health', kde=True, palette=palette)
    axes_flat[i].set_title(f'{cols} vs Frequency')

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
sns.set_theme(style="ticks")
sns.scatterplot(x=data.prolongued_decelerations[data.fetal_health==1.0], y=data.abnormal_short_term_variability[(data.fetal_health==1.0)], c="Green" )
sns.scatterplot(x=data.prolongued_decelerations[data.fetal_health==2.0], y=data.abnormal_short_term_variability[(data.fetal_health==2.0)], c="Yellow" )
sns.scatterplot(x=data.prolongued_decelerations[data.fetal_health==3.0], y=data.abnormal_short_term_variability[(data.fetal_health==3.0)], c="Red" )

plt.title('Scattering the prolongued_decelerations based on abnormal_short_term_variability')
plt.legend(["Normal", "Suspect", "Pathological"])
plt.xlabel("abnormal_short_term_variability")
plt.ylabel("prolongued_decelerations");
plt.grid(visible=False)

In [None]:
cross_tab = pd.crosstab(data['fetal_movement'], data['fetal_health'])
print("Cross-tabulation of fetal_movement vs. fetal_health:")
print(cross_tab)


In [None]:
X = data.iloc[:, 0: -1].values
Y = data.iloc[:, -1].values

In [None]:
#DataCleaning

Y=Y.reshape(-1,1)
imputerX = SimpleImputer(strategy = "mean", missing_values = np.nan)
imputerX = imputerX.fit(X)
X = imputerX.transform(X)
imputerY = SimpleImputer(strategy = "mean", missing_values = np.nan)
imputerY = imputerY.fit(Y)
Y = imputerY.transform(Y)
Y = np.ravel(Y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
#Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
rfc = RandomForestClassifier()

In [None]:
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)

print('Accuracy:', accuracy_score(y_test, pred))

In [None]:
cm = confusion_matrix(y_test, pred)
classes = data.fetal_health.unique()
display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
display.plot()

plt.show()

In [None]:
report = classification_report(y_test, pred)
print(report)

Hyperparameter Tuning

In [None]:
#grid search
rfc_params={

    "n_estimators":[100, 300, 600, 1000, 2000],
    "min_samples_split":[2, 3, 4],
    "min_samples_leaf":[1, 2, 3, 4],
    'bootstrap': [True, False],
    'criterion' :['gini', 'entropy']
}


In [None]:
%%time
rfc_gridSearch=GridSearchCV(rfc, rfc_params, scoring='accuracy', cv=10, n_jobs=-1,verbose=0)
rfc_gridSearch.fit(X_train,y_train)

In [None]:
rfc_gridSearch.best_params_

In [None]:
rfc_gridSearch.best_score_

In [None]:
rfc_best = RandomForestClassifier(**rfc_gridSearch.best_params_)
rfc_best.fit(X_train,y_train)

In [None]:
y_pred=rfc_best.predict(X_test)
accuracy_score(y_pred,y_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
classes = data.fetal_health.unique()
display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
display.plot()

plt.show()

In [None]:
#Random Search

from sklearn.model_selection import RandomizedSearchCV

# Create the RandomizedSearchCV object
random_search = RandomizedSearchCV(
    estimator=rfc,
    param_distributions=rfc_params,
    n_iter=10,  # Number of random combinations to try
    scoring='accuracy',  # Choose an appropriate metric
    cv=10,  # Number of cross-validation folds
    verbose=1,
    n_jobs=-1  # Use all available CPU cores
)

# Fit the RandomizedSearchCV object to the training data
random_search.fit(X_train,y_train)
# Print the best hyperparameters and corresponding score
print("Best parameters:", random_search.best_params_)
print("Best cross-validation score:", random_search.best_score_)

In [None]:
best_rfc_model = RandomForestClassifier(**random_search.best_params_)
best_rfc_model.fit(X_train,y_train)

In [None]:
y_pred=best_rfc_model.predict(X_test)
print('accuracy:',accuracy_score(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
classes = data.fetal_health.unique()
display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
display.plot()

plt.show()

In [None]:
 f#Genetic poppulatiopn generation comparison
!pip install tpot


In [None]:
import numpy as np
from tpot import TPOTClassifier

In [None]:
# Define the TPOT configuration for regression
tpot_config = {
    'sklearn.ensemble.RandomForestClassifier': {
        "n_estimators":[100, 300, 600, 1000, 2000],
    "min_samples_split":[2, 3, 4],
    "min_samples_leaf":[1, 2, 3],
    'bootstrap': [True, False],
    'criterion' :['gini', 'entropy']
    }
}

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=10, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test Acc: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=30, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test Acc: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=50, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=70, cv=10, offspring_size = 5, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=100, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=150, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=10, population_size=200, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=10, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=30, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=50, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=70, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=100, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=150, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=20, population_size=200, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=10, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=30, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=50, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=70, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=100, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=150, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")

In [None]:
# Initialize TPOT for classification
tpot = TPOTClassifier(generations=30, population_size=200, offspring_size = 5, cv=10, scoring='accuracy', config_dict=tpot_config, verbosity=2, n_jobs=-1)
# Fit TPOT to the training data
tpot.fit(X_train, y_train)

# Get the best pipeline (including hyperparameters)
best_pipeline = tpot.fitted_pipeline_

# Get the best score (accuracy)
best_score = tpot.score(X_test, y_test)

print(f"Best score (accuracy): {best_score:.4f}")

In [None]:
# Evaluate the best pipeline on the test set using MSE
y_pred = best_pipeline.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Best pipeline: {best_pipeline}")
print(f"Test MSE: {acc:.4f}")