<a href="https://colab.research.google.com/github/2303a52332/Generative_AI_2025/blob/main/lab_7_1_gen_ai_2303A52332ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adadelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load Dataset
dataset_url = "https://drive.google.com/uc?id=1AcdENlVm5dccNyo_vgdMbneX8YVvH5R3"
df = pd.read_csv(dataset_url)

# Print Data Info
print("Dataset Info:")
print(df.info())
print("\nSample Data:")
print(df.head())

# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
print("\nCategorical Columns:", list(categorical_cols))

# Convert categorical columns to numeric using LabelEncoder
label_encoders = {}  # Store encoders for future use
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert categories to numbers
    label_encoders[col] = le  # Save encoder for deployment

# Separate features and target
label_encoders[col] = le  # Save encoder for deployment

# Separate features and target
y = df.iloc[:, -1].values  # Last column as target (Diabetes Diagnosis)
X = df.iloc[:, :-1].values  # All other columns as features

# Normalize Features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into Training and Testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build ANN Model
model = Sequential([
    Dense(8, activation='relu', input_shape=(X_train.shape[1],)),  # Hidden Layer 1
    Dense(16, activation='relu'),  # Hidden Layer 2
    Dense(20, activation='relu'),  # Hidden Layer 3
    Dense(10, activation='relu'),  # Hidden Layer 4
    Dense(1, activation='sigmoid')  # Output Layer for binary classification
])

# Compile Model
model.compile(optimizer=Adadelta(), loss='binary_crossentropy', metrics=['accuracy'])

# Train Model
model.fit(X_train, y_train, epochs=150, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Save Model
model.save("diabetes_ann_model.h5")

# Predict on Training and Test Data
y_pred_train = (model.predict(X_train) > 0.5).astype(int)
y_pred_test = (model.predict(X_test) > 0.5).astype(int)

# Calculate Performance Metrics
train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)
conf_matrix = confusion_matrix(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)

# Display Results
print(f"\nTraining Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy: {test_acc:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)

# Load the Model for Deployment
loaded_model = keras.models.load_model("diabetes_ann_model.h5")

# Function to Predict Diabetes
def predict_diabetes(input_data):
    """
    This function takes a raw input data row, processes it, and predicts diabetes.
    """
    # Ensure input data is a 2D array
    input_data = np.array(input_data).reshape(1, -1)

    # Apply Standard Scaling
    input_scaled = scaler.transform(input_data)
# Make Prediction
    prediction = loaded_model.predict(input_scaled)[0, 0]

    return "Diabetic" if prediction > 0.5 else "Non-Diabetic"

# Example Prediction
sample_patient = X_test[0]  # Get a sample from test data
prediction_result = predict_diabetes(sample_patient)
print(f"\nPredicted Diagnosis: {prediction_result}")


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
None

Sample Data:
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2    

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 47ms/step - accuracy: 0.4003 - loss: 0.5539 - val_accuracy: 0.3578 - val_loss: 0.5327
Epoch 2/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.4297 - loss: 0.5624 - val_accuracy: 0.3578 - val_loss: 0.5326
Epoch 3/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.4237 - loss: 0.5360 - val_accuracy: 0.3578 - val_loss: 0.5324
Epoch 4/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4053 - loss: 0.5519 - val_accuracy: 0.3578 - val_loss: 0.5323
Epoch 5/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4157 - loss: 0.5548 - val_accuracy: 0.3578 - val_loss: 0.5322
Epoch 6/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4324 - loss: 0.5519 - val_accuracy: 0.3578 - val_loss: 0.5321
Epoch 7/150
[1m7/7[0m [32m━━━━━━━━━━━



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Training Accuracy: 0.4312
Testing Accuracy: 0.3578

Confusion Matrix:
[[ 0 29  0]
 [ 0 39  0]
 [ 0 41  0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        29
           1       0.36      1.00      0.53        39
           2       0.00      0.00      0.00        41

    accuracy                           0.36       109
   macro avg       0.12      0.33      0.18       109
weighted avg       0.13      0.36      0.19       109

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step

Predicted Diagnosis: Diabetic
