<a href="https://colab.research.google.com/github/Muhammad-Roshaan-Idrees/Artificial_Intelligence/blob/main/Diabeties_Prediction_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

In [None]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Load the dataset
df = pd.read_csv('diabetes.csv')

# Display first 5 rows
print("First 5 rows of the dataset:")
print(df.head())

In [None]:
# Data Exploration and Preprocessing
print("\nData Exploration")
print("Dataset shape:", df.shape)
print("\nData types:")
print(df.dtypes)
print("\nStatistical summary:")
print(df.describe())

# Check for zeros
zero_cols = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
print("\nNumber of zeros in each column (Before):")
for col in zero_cols:
    zero_count = (df[col] == 0).sum()
    print(f"{col}: {zero_count} zeros ({zero_count/len(df)*100:.2f}%)")

# Replace zeros with NaN and then with column mean
for col in zero_cols:
    df[col] = df[col].replace(0, np.nan)
    df[col].fillna(df[col].mean(), inplace=True)

# Verify zeros
print("\nAfter handling zeros:")
for col in zero_cols:
    zero_count = (df[col] == 0).sum()
    print(f"{col}: {zero_count} zeros")

In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap of Diabetes Dataset')
plt.tight_layout()
plt.show()

In [None]:
# Split data into features and target
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"\nTraining set shape: {X_train}")
print(f"Test set shape: {X_test}")


Training set shape: [[-0.52639686 -1.25828206  0.01321033 ...  0.01501323 -0.49073479
  -1.03594038]
 [ 1.58804586 -0.32735374  0.8068672  ... -0.59935041  2.41502991
   1.48710085]
 [-0.82846011  0.57032714 -2.17095414 ... -0.52719904  0.54916055
  -0.94893896]
 ...
 [ 1.8901091  -0.69307558  1.13773624 ...  1.91151712  1.981245
   0.44308379]
 [-1.13052335  0.63682202  0.01321033 ...  1.44974838 -0.78487662
  -0.33992901]
 [-1.13052335  0.10486298  1.96490883 ... -1.42187598 -0.61552223
  -1.03594038]]
Test set shape: [[ 0.68185612 -0.7928179  -1.17834702 ...  0.23760544 -0.11637247
   0.87809089]
 [-0.52639686 -0.32735374  0.22784639 ...  0.48292008 -0.954231
  -1.03594038]
 [-0.52639686 -0.4603435  -0.68204347 ... -0.22416331 -0.9245197
  -1.03594038]
 ...
 [ 1.28598261 -0.89256022 -0.02030539 ...  0.64165309  0.04703966
   2.0961108 ]
 [-0.52639686  0.80305922 -0.18573991 ... -0.62821095 -0.39268751
  -0.33992901]
 [ 1.28598261 -1.59075646 -0.18573991 ...  0.42519899  0.70068816


In [None]:
# Building the Neural Network Model
model = Sequential()
model.add(Dense(16, input_dim=8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(12, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display model architecture
print("\nModel Architecture")
model.summary()


Model Architecture


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Stop training early if validation stops improving
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Training and Evaluation
print("\nTraining the Model")
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=10)


In [None]:
# Building the Neural Network Model
model = Sequential()
model.add(Dense(16, input_dim=8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(12, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display model architecture
print("\nModel Architecture")
model.summary()

In [None]:
# Accuracy over epochs
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Model Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# Loss over epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Model Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
# Make predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Evaluate model performance
print("\nModel Evaluation")
acc = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {acc}')

conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix:\n {conf_matrix}')

class_report = classification_report(y_test, y_pred)
print(f'Classification Report:\n {class_report}')