In [12]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [16]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = 'diabetes_dataset.txt' 

with open(file_path, 'r') as file:
    lines = file.readlines()

data = []
for line in lines:
    parts = line.strip().split()
    label = int(parts[0])
    features = [float(p.split(':')[1]) for p in parts[1:]]
    data.append([label] + features)

columns = ['target'] + [f'feature_{i}' for i in range(len(data[0])-1)]
df = pd.DataFrame(data, columns=columns)

# Display basic information about the dataset
print(df.info())
print("\nFirst few rows of the dataset:")
print(df.head())

print("\nMissing values:")
print(df.isnull().sum())

print("\nBasic statistics:")
print(df.describe())

df.to_csv('diabetes.csv', index=False)
print("\nProcessed data saved as 'diabetes.csv'")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   target     768 non-null    int64  
 1   feature_0  768 non-null    float64
 2   feature_1  768 non-null    float64
 3   feature_2  768 non-null    float64
 4   feature_3  768 non-null    float64
 5   feature_4  768 non-null    float64
 6   feature_5  768 non-null    float64
 7   feature_6  768 non-null    float64
 8   feature_7  768 non-null    float64
dtypes: float64(8), int64(1)
memory usage: 54.1 KB
None

First few rows of the dataset:
   target  feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
0      -1        6.0      148.0       72.0       35.0        0.0  33.599998   
1       1        1.0       85.0       66.0       29.0        0.0  26.600000   
2      -1        8.0      183.0       64.0        0.0        0.0  23.299999   
3       1        1.0       89.0       66.0       23.

In [41]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('diabetes.csv')

# Strip whitespace from column names 
df.columns = df.columns.str.strip()

# Print columns to verify
print("Columns in the DataFrame:")
print(df.columns)

# Separate features and labels
X = df.drop('target', axis=1)
y = df['target']

# Feature Standardization (Z-score normalization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training, validation, and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val)

# Implement the Perceptron Model
learning_rates = [0.01, 0.05, 0.1]
max_iterations = [1000]
best_perceptron = None
best_accuracy = 0
best_params = {}

for lr in learning_rates:
    for max_iter in max_iterations:
        print(f"\nTraining Perceptron with learning rate={lr}, max_iter={max_iter}")
        perceptron = Perceptron(
            eta0=lr,
            max_iter=max_iter,
            random_state=42,
            early_stopping=True,
            validation_fraction=0.1,
            n_iter_no_change=10
        )
        perceptron.fit(X_train, y_train)

        # Evaluate the model on the validation set
        y_val_pred = perceptron.predict(X_val)
        val_accuracy = accuracy_score(y_val, y_val_pred)
        print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_perceptron = perceptron
            best_params = {'learning_rate': lr, 'max_iter': max_iter}

# Evaluate the Best Perceptron Model on the Test Set
print(f"\nBest Perceptron Model Parameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")

# Evaluate on the test set
y_test_pred_perceptron = best_perceptron.predict(X_test)
test_accuracy_perceptron = accuracy_score(y_test, y_test_pred_perceptron)
print(f"Perceptron Model Test Accuracy: {test_accuracy_perceptron * 100:.2f}%")

# Print classification report and confusion matrix
print("\nPerceptron Model Classification Report:")
print(classification_report(y_test, y_test_pred_perceptron))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_perceptron))

# Implement the CNN Model
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val_cnn = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build and Train the CNN Model
cnn_model = Sequential([
    Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(1, activation='sigmoid')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = cnn_model.fit(
    X_train_cnn, y_train,
    epochs=100,
    batch_size=16,
    validation_data=(X_val_cnn, y_val),
    callbacks=[early_stopping]
)

# Evaluate the CNN Model on the Test Set
loss_cnn, accuracy_cnn = cnn_model.evaluate(X_test_cnn, y_test)
print(f"\nCNN Model Test Accuracy: {accuracy_cnn * 100:.2f}%")

# Predict and generate classification report
y_test_pred_cnn = (cnn_model.predict(X_test_cnn) >= 0.5).astype(int).flatten()
print("\nCNN Model Classification Report:")
print(classification_report(y_test, y_test_pred_cnn))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_cnn))

# Compare Model Performance
print(f"\nPerceptron Model Test Accuracy: {test_accuracy_perceptron * 100:.2f}%")
print(f"CNN Model Test Accuracy: {accuracy_cnn * 100:.2f}%") 


Columns in the DataFrame:
Index(['target', 'feature_0', 'feature_1', 'feature_2', 'feature_3',
       'feature_4', 'feature_5', 'feature_6', 'feature_7'],
      dtype='object')

Training Perceptron with learning rate=0.01, max_iter=1000
Validation Accuracy: 69.48%

Training Perceptron with learning rate=0.05, max_iter=1000
Validation Accuracy: 69.48%

Training Perceptron with learning rate=0.1, max_iter=1000
Validation Accuracy: 69.48%

Best Perceptron Model Parameters: {'learning_rate': 0.01, 'max_iter': 1000}
Best Validation Accuracy: 69.48%
Perceptron Model Test Accuracy: 65.58%

Perceptron Model Classification Report:
              precision    recall  f1-score   support

          -1       0.51      0.43      0.46        54
           1       0.72      0.78      0.75       100

    accuracy                           0.66       154
   macro avg       0.61      0.60      0.61       154
weighted avg       0.64      0.66      0.65       154

Confusion Matrix:
[[23 31]
 [22 78]]
Epoch 

  super().__init__(


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.2152 - loss: 0.5841 - val_accuracy: 0.0909 - val_loss: 0.4923
Epoch 2/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1571 - loss: 0.4684 - val_accuracy: 0.0909 - val_loss: 0.4029
Epoch 3/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1414 - loss: 0.3567 - val_accuracy: 0.0909 - val_loss: 0.3255
Epoch 4/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1600 - loss: 0.2974 - val_accuracy: 0.1039 - val_loss: 0.2581
Epoch 5/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1544 - loss: 0.0823 - val_accuracy: 0.1299 - val_loss: 0.1852
Epoch 6/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2045 - loss: 0.1056 - val_accuracy: 0.1558 - val_loss: 0.1134
Epoch 7/100
[1m29/29[0m [32m━━━━━━━━━━━━━━

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [42]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('diabetes.csv')

# Strip whitespace from column names 
df.columns = df.columns.str.strip()

# Print columns to verify
print("Columns in the DataFrame:")
print(df.columns)

# Separate features and labels
X = df.drop('target', axis=1)
y = df['target']

# Feature Standardization (Z-score normalization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training, validation, and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val)


Columns in the DataFrame:
Index(['target', 'feature_0', 'feature_1', 'feature_2', 'feature_3',
       'feature_4', 'feature_5', 'feature_6', 'feature_7'],
      dtype='object')


In [43]:
# Before training, convert labels
y_train = np.where(y_train == -1, 0, y_train)
y_val = np.where(y_val == -1, 0, y_val)
y_test = np.where(y_test == -1, 0, y_test)


In [44]:
# Confirm labels are 0 and 1
print("Unique labels in y_train:", np.unique(y_train))


Unique labels in y_train: [0 1]


In [45]:
# Assuming your labels are originally -1 and 1
# Convert labels to 0 and 1
y = df['target']
y = np.where(y == -1, 0, y)

# Update y in train, validation, and test sets after splitting
# (Ensure to apply the transformation before splitting to avoid data leakage)

In [46]:
# Labels are now 0 and 1
# No need to transform labels within the Perceptron class
# Proceed to train the Perceptron model as before
perceptron.fit(X_train, y_train)

In [47]:
# Implement the Perceptron Model
learning_rates = [0.01, 0.05, 0.1]
max_iterations = [1000]
best_perceptron = None
best_accuracy = 0
best_params = {}

for lr in learning_rates:
    for max_iter in max_iterations:
        print(f"\nTraining Perceptron with learning rate={lr}, max_iter={max_iter}")
        perceptron = Perceptron(
            eta0=lr,
            max_iter=max_iter,
            random_state=42,
            early_stopping=True,
            validation_fraction=0.1,
            n_iter_no_change=10
        )
        perceptron.fit(X_train, y_train)

        # Evaluate the model on the validation set
        y_val_pred = perceptron.predict(X_val)
        val_accuracy = accuracy_score(y_val, y_val_pred)
        print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_perceptron = perceptron
            best_params = {'learning_rate': lr, 'max_iter': max_iter}

# Evaluate the Best Perceptron Model on the Test Set
print(f"\nBest Perceptron Model Parameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")

# Evaluate on the test set
y_test_pred_perceptron = best_perceptron.predict(X_test)
test_accuracy_perceptron = accuracy_score(y_test, y_test_pred_perceptron)
print(f"Perceptron Model Test Accuracy: {test_accuracy_perceptron * 100:.2f}%")

# Print classification report and confusion matrix
print("\nPerceptron Model Classification Report:")
print(classification_report(y_test, y_test_pred_perceptron))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_perceptron))


Training Perceptron with learning rate=0.01, max_iter=1000
Validation Accuracy: 69.48%

Training Perceptron with learning rate=0.05, max_iter=1000
Validation Accuracy: 69.48%

Training Perceptron with learning rate=0.1, max_iter=1000
Validation Accuracy: 69.48%

Best Perceptron Model Parameters: {'learning_rate': 0.01, 'max_iter': 1000}
Best Validation Accuracy: 69.48%
Perceptron Model Test Accuracy: 65.58%

Perceptron Model Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.43      0.46        54
           1       0.72      0.78      0.75       100

    accuracy                           0.66       154
   macro avg       0.61      0.60      0.61       154
weighted avg       0.64      0.66      0.65       154

Confusion Matrix:
[[23 31]
 [22 78]]


In [49]:
# Implement the CNN Model
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val_cnn = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build and Train the CNN Model
cnn_model = Sequential([
    Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(1, activation='sigmoid')
])

# Labels are 0 and 1
# Compile and train the CNN model as before
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = cnn_model.fit(
    X_train_cnn, y_train,
    epochs=100,
    batch_size=16,
    validation_data=(X_val_cnn, y_val),
    callbacks=[early_stopping]
)

# Evaluate the CNN Model on the Test Set
loss_cnn, accuracy_cnn = cnn_model.evaluate(X_test_cnn, y_test)
print(f"\nCNN Model Test Accuracy: {accuracy_cnn * 100:.2f}%")

# Predict and generate classification report
y_test_pred_cnn = (cnn_model.predict(X_test_cnn) >= 0.5).astype(int).flatten()
print("\nCNN Model Classification Report:")
print(classification_report(y_test, y_test_pred_cnn))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_cnn))

Epoch 1/100


  super().__init__(


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.4250 - loss: 0.7256 - val_accuracy: 0.6623 - val_loss: 0.6726
Epoch 2/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6648 - loss: 0.6487 - val_accuracy: 0.6948 - val_loss: 0.6351
Epoch 3/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6945 - loss: 0.6049 - val_accuracy: 0.7078 - val_loss: 0.6086
Epoch 4/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7623 - loss: 0.5571 - val_accuracy: 0.7273 - val_loss: 0.5892
Epoch 5/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7398 - loss: 0.5523 - val_accuracy: 0.7468 - val_loss: 0.5749
Epoch 6/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7758 - loss: 0.5179 - val_accuracy: 0.7468 - val_loss: 0.5635
Epoch 7/100
[1m29/29[0m [32m━━━━━━━━━━━━━━

In [51]:
# For both models, predictions will be 0 or 1
# Generate classification reports and confusion matrices
from sklearn.metrics import classification_report, confusion_matrix

# Perceptron Model
y_test_pred_perceptron = perceptron.predict(X_test)
print("Perceptron Model Classification Report:")
print(classification_report(y_test, y_test_pred_perceptron))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_perceptron))
print(f"\nPerceptron Model Test Accuracy: {test_accuracy_perceptron * 100:.2f}%")

# CNN Model
y_test_pred_cnn = (cnn_model.predict(X_test_cnn) >= 0.5).astype(int).flatten()
print("CNN Model Classification Report:")
print(classification_report(y_test, y_test_pred_cnn))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred_cnn))
print(f"CNN Model Test Accuracy: {accuracy_cnn * 100:.2f}%") 

Perceptron Model Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.43      0.46        54
           1       0.72      0.78      0.75       100

    accuracy                           0.66       154
   macro avg       0.61      0.60      0.61       154
weighted avg       0.64      0.66      0.65       154

Confusion Matrix:
[[23 31]
 [22 78]]

Perceptron Model Test Accuracy: 65.58%
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
CNN Model Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.54      0.60        54
           1       0.77      0.86      0.82       100

    accuracy                           0.75       154
   macro avg       0.72      0.70      0.71       154
weighted avg       0.74      0.75      0.74       154

Confusion Matrix:
[[29 25]
 [14 86]]
CNN Model Test Accuracy: 74.68%
