<a href="https://colab.research.google.com/github/2303a52192/GENERATIVE_AI_2025/blob/main/2303A52192_GEN_AI_ass_7_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adadelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load Dataset
dataset_url = "https://drive.google.com/uc?id=1AcdENlVm5dccNyo_vgdMbneX8YVvH5R3"
df = pd.read_csv(dataset_url)

# Print Data Info
print("Dataset Info:")
print(df.info())
print("\nSample Data:")
print(df.head())

# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
print("\nCategorical Columns:", list(categorical_cols))

# Convert categorical columns to numeric using LabelEncoder
label_encoders = {}  # Store encoders for future use
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert categories to numbers
    label_encoders[col] = le  # Save encoder for deployment

# Separate features and target
label_encoders[col] = le  # Save encoder for deployment

# Separate features and target
y = df.iloc[:, -1].values  # Last column as target (Diabetes Diagnosis)
X = df.iloc[:, :-1].values  # All other columns as features

# Normalize Features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into Training and Testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build ANN Model
model = Sequential([
    Dense(8, activation='relu', input_shape=(X_train.shape[1],)),  # Hidden Layer 1
    Dense(16, activation='relu'),  # Hidden Layer 2
    Dense(20, activation='relu'),  # Hidden Layer 3
    Dense(10, activation='relu'),  # Hidden Layer 4
    Dense(1, activation='sigmoid')  # Output Layer for binary classification
])

# Compile Model
model.compile(optimizer=Adadelta(), loss='binary_crossentropy', metrics=['accuracy'])

# Train Model
model.fit(X_train, y_train, epochs=150, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Save Model
model.save("diabetes_ann_model.h5")

# Predict on Training and Test Data
y_pred_train = (model.predict(X_train) > 0.5).astype(int)
y_pred_test = (model.predict(X_test) > 0.5).astype(int)

# Calculate Performance Metrics
train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)
conf_matrix = confusion_matrix(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)

# Display Results
print(f"\nTraining Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy: {test_acc:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)

# Load the Model for Deployment
loaded_model = keras.models.load_model("diabetes_ann_model.h5")

# Function to Predict Diabetes
def predict_diabetes(input_data):
    """
    This function takes a raw input data row, processes it, and predicts diabetes.
    """
    # Ensure input data is a 2D array
    input_data = np.array(input_data).reshape(1, -1)

    # Apply Standard Scaling
    input_scaled = scaler.transform(input_data)
# Make Prediction
    prediction = loaded_model.predict(input_scaled)[0, 0]

    return "Diabetic" if prediction > 0.5 else "Non-Diabetic"

# Example Prediction
sample_patient = X_test[0]  # Get a sample from test data
prediction_result = predict_diabetes(sample_patient)
print(f"\nPredicted Diagnosis: {prediction_result}")

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
None

Sample Data:
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2    

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 147ms/step - accuracy: 0.3745 - loss: 0.5483 - val_accuracy: 0.3486 - val_loss: 0.5449
Epoch 2/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4006 - loss: 0.5489 - val_accuracy: 0.3486 - val_loss: 0.5448
Epoch 3/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.4035 - loss: 0.5674 - val_accuracy: 0.3486 - val_loss: 0.5447
Epoch 4/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.4167 - loss: 0.5554 - val_accuracy: 0.3486 - val_loss: 0.5445
Epoch 5/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - accuracy: 0.4029 - loss: 0.5465 - val_accuracy: 0.3486 - val_loss: 0.5444
Epoch 6/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3895 - loss: 0.5421 - val_accuracy: 0.3486 - val_loss: 0.5443
Epoch 7/150
[1m7/7[0m [32m━━━━━━━━━━



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 

Training Accuracy: 0.4243
Testing Accuracy: 0.3578

Confusion Matrix:
[[ 1 28  0]
 [ 1 38  0]
 [ 4 37  0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.17      0.03      0.06        29
           1       0.37      0.97      0.54        39
           2       0.00      0.00      0.00        41

    accuracy                           0.36       109
   macro avg       0.18      0.34      0.20       109
weighted avg       0.18      0.36      0.21       109



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step

Predicted Diagnosis: Diabetic
