<a href="https://colab.research.google.com/github/Amma-Anjali/Generative_AI_2025/blob/main/7_1_gen_ai_2303A52385.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adadelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load Dataset
dataset_url = "https://drive.google.com/uc?id=1AcdENlVm5dccNyo_vgdMbneX8YVvH5R3"
df = pd.read_csv(dataset_url)

# Print Data Info
print("Dataset Info:")
print(df.info())
print("\nSample Data:")
print(df.head())

# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
print("\nCategorical Columns:", list(categorical_cols))

# Convert categorical columns to numeric using LabelEncoder
label_encoders = {}  # Store encoders for future use
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert categories to numbers
    label_encoders[col] = le  # Save encoder for deployment

# Separate features and target
label_encoders[col] = le  # Save encoder for deployment
# Separate features and target
y = df.iloc[:, -1].values  # Last column as target (Diabetes Diagnosis)
X = df.iloc[:, :-1].values  # All other columns as features

# Normalize Features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into Training and Testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build ANN Model
model = Sequential([
    Dense(8, activation='relu', input_shape=(X_train.shape[1],)),  # Hidden Layer 1
    Dense(16, activation='relu'),  # Hidden Layer 2
    Dense(20, activation='relu'),  # Hidden Layer 3
    Dense(10, activation='relu'),  # Hidden Layer 4
    Dense(1, activation='sigmoid')  # Output Layer for binary classification
])

# Compile Model
model.compile(optimizer=Adadelta(), loss='binary_crossentropy', metrics=['accuracy'])

# Train Model
model.fit(X_train, y_train, epochs=150, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Save Model
model.save("diabetes_ann_model.h5")

# Predict on Training and Test Data
y_pred_train = (model.predict(X_train) > 0.5).astype(int)
y_pred_test = (model.predict(X_test) > 0.5).astype(int)

# Calculate Performance Metrics
train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)
conf_matrix = confusion_matrix(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)

# Display Results
print(f"\nTraining Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy: {test_acc:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)

# Load the Model for Deployment
loaded_model = keras.models.load_model("diabetes_ann_model.h5")

# Function to Predict Diabetes
def predict_diabetes(input_data):
    """
    This function takes a raw input data row, processes it, and predicts diabetes.
    """
    # Ensure input data is a 2D array
    input_data = np.array(input_data).reshape(1, -1)

    # Apply Standard Scaling
    input_scaled = scaler.transform(input_data)
# Make Prediction
    prediction = loaded_model.predict(input_scaled)[0, 0]

    return "Diabetic" if prediction > 0.5 else "Non-Diabetic"

# Example Prediction
sample_patient = X_test[0]  # Get a sample from test data
prediction_result = predict_diabetes(sample_patient)
print(f"\nPredicted Diagnosis: {prediction_result}")