# PCOS Diagnosis Dataset
#### This dataset contains information related to patients with Polycystic Ovary Syndrome (PCOS), a common hormonal disorder affecting women of reproductive age. The dataset consists of 1000 entries, each representing a patient, and includes five key features that are typically associated with the diagnosis and risk factors of PCOS. The features provide valuable insights into patient's health for predicting PCOS diagnoses.

In [13]:
'''Features'''
# Age (years): The age of the patient, ranging from 18 to 45 years.
# BMI (kg/m²): The Body Mass Index, which is a measure of body fat based on height and weight, ranging from 18 to 35.
# Menstrual Irregularity (binary): A binary indicator showing whether the patient has irregular menstrual cycles (0 = No, 1 = Yes).
# Testosterone Level (ng/dL): The level of testosterone in the patient's blood, an important hormonal indicator of PCOS, ranging from 20 to 100 ng/dL.
# Antral Follicle Count: The number of antral follicles detected during an ultrasound, ranging from 5 to 30, which helps in assessing ovarian reserve and PCOS presence.

'''Target Variable'''
# PCOS Diagnosis (binary): A binary indicator of whether the patient has been diagnosed with PCOS (0 = No, 1 = Yes), based on a combination of risk factors.

'Target Variable'

## Import Data

In [14]:
import pandas as pd

df = pd.read_csv("E:\DataScience\Projects\DL\pcos_dataset.csv")
print(df.head())

   Age   BMI  Menstrual_Irregularity  Testosterone_Level(ng/dL)  \
0   24  34.7                       1                       25.2   
1   37  26.4                       0                       57.1   
2   32  23.6                       0                       92.7   
3   28  28.8                       0                       63.1   
4   25  22.1                       1                       59.8   

   Antral_Follicle_Count  PCOS_Diagnosis  
0                     20               0  
1                     25               0  
2                     28               0  
3                     26               0  
4                      8               0  


## Data Cleaning

In [15]:
df = df.dropna()            # drop null values
df = df.drop_duplicates()   # drop duplicates

## Split Independent and Dependent Variables

In [16]:
X = df.iloc[:,:-1].values     # independent
y = df.iloc[:,-1].values      # dependent

In [17]:
# check the number of features
n_features = X.shape[1]      
print(n_features)

#  display target variable column as array
y

5


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,

## Train Test Split

In [18]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

## Build Tensorflow Model

In [19]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Artifical neural networks using sequential model
model = Sequential()                 

model.add(Dense(128, activation='relu', input_shape=(n_features,)))    # 5 input layers

model.add(Dense(1, activation='sigmoid'))  # 1 neuron for two classes - 0/1 - binary
 
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Fitting into the Model

In [20]:
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=2)

Epoch 1/150
25/25 - 1s - 40ms/step - accuracy: 0.7538 - loss: 0.5331
Epoch 2/150
25/25 - 0s - 4ms/step - accuracy: 0.8025 - loss: 0.4768
Epoch 3/150
25/25 - 0s - 4ms/step - accuracy: 0.8025 - loss: 0.4365
Epoch 4/150
25/25 - 0s - 4ms/step - accuracy: 0.8112 - loss: 0.4149
Epoch 5/150
25/25 - 0s - 3ms/step - accuracy: 0.8313 - loss: 0.3983
Epoch 6/150
25/25 - 0s - 4ms/step - accuracy: 0.8550 - loss: 0.3811
Epoch 7/150
25/25 - 0s - 4ms/step - accuracy: 0.8500 - loss: 0.3705
Epoch 8/150
25/25 - 0s - 4ms/step - accuracy: 0.8425 - loss: 0.3638
Epoch 9/150
25/25 - 0s - 4ms/step - accuracy: 0.8512 - loss: 0.3630
Epoch 10/150
25/25 - 0s - 4ms/step - accuracy: 0.8600 - loss: 0.3470
Epoch 11/150
25/25 - 0s - 4ms/step - accuracy: 0.8438 - loss: 0.3523
Epoch 12/150
25/25 - 0s - 5ms/step - accuracy: 0.8788 - loss: 0.3224
Epoch 13/150
25/25 - 0s - 5ms/step - accuracy: 0.8512 - loss: 0.3197
Epoch 14/150
25/25 - 0s - 4ms/step - accuracy: 0.8500 - loss: 0.3249
Epoch 15/150
25/25 - 0s - 5ms/step - accur

<keras.src.callbacks.history.History at 0x23f74453590>

## Evaluating the Model

In [21]:
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f '% acc)

Test Accuracy: 0.945 


## Predicting the class

In [22]:
# saving the model
# model.save('pcos.keras')     

# from tensorflow.keras.models import load_model
# model = load_model('pcos.keras')

# Make a prediction with a sample row of data
import numpy as np
sample = np.array([[18, 35, 1, 100, 30]])  
prediction = model.predict(sample)

predicted_class = np.argmax(prediction, axis=1)
print('Predicted class:', predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
Predicted class: [0]
