In [15]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import gradio as gr

# Try to import google.colab; if it fails, assume we're not in Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# Check if files exist
file1 = "/content/full_data (4).csv"
file2 = "/content/full_filled_stroke_data (1) (1).csv"

# If in Colab and files don't exist, prompt for upload
if IN_COLAB and (not os.path.exists(file1) or not os.path.exists(file2)):
    from google.colab import files
    print(f"Error: '{file1}' or '{file2}' not found! Please upload them.")
    uploaded = files.upload()
    # Check if uploaded files match expected names
    if file1 not in uploaded or file2 not in uploaded:
        raise FileNotFoundError(f"Uploaded files do not match expected names: '{file1}', '{file2}'")
elif not os.path.exists(file1) or not os.path.exists(file2):
    # If not in Colab, raise the FileNotFoundError
    raise FileNotFoundError(f"Error: '{file1}' or '{file2}' not found! Please check file paths.")

# Load datasets
df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# Load datasets
df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# Data preprocessing
df1 = pd.get_dummies(df1, columns=['work_type', 'smoking_status'])
df1['gender'] = df1['gender'].apply(lambda x: 1 if x == 'Male' else 0)
df1['ever_married'] = df1['ever_married'].apply(lambda x: 1 if x == 'Yes' else 0)
df1['Residence_type'] = df1['Residence_type'].apply(lambda x: 1 if x == 'Urban' else 0)

# Features and labels
X = df1.drop(columns='stroke')
y = df1['stroke']

# Feature scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Check class balance
stroke_counts = y.value_counts()
print("Stroke class distribution:\n", stroke_counts)

if len(stroke_counts) < 2:
    raise ValueError("The target 'stroke' needs both 0s and 1s. Found only one class.")

# Oversampling using SMOTE
smt = SMOTE(random_state=42)
X_smt, y_smt = smt.fit_resample(X_scaled, y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_smt, y_smt, test_size=0.1, random_state=42)

# Building the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.2),
    BatchNormalization(),

    Dense(32, activation='relu'),
    Dropout(0.2),
    BatchNormalization(),

    Dense(1, activation='sigmoid')
])

# Compile the model
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping
callback = EarlyStopping(monitor="val_loss", patience=20, verbose=1, restore_best_weights=True)

# Training the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[callback], verbose=1)

# Plot accuracy and loss
def plot_model_performance(history):
    plt.figure(figsize=(12,5))

    # Accuracy plot
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Test Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss plot
    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Test Loss')
    plt.title('Model Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Predict and evaluate
def evaluate_model():
    y_pred = (model.predict(X_test[:500]) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test[:500], y_pred) * 100
    classification_rep = classification_report(y_test[:500], y_pred)

    plot_model_performance(history)

    return f"Accuracy: {accuracy:.2f}%\n\nClassification Report:\n{classification_rep}"

# Gradio interface
interface = gr.Interface(
    fn=evaluate_model,
    inputs=[],
    outputs="text",
    title="Stroke Prediction Results",
    description="Click the button below to view the stroke prediction analysis based on deep learning."
)

# Launch Gradio
interface.launch(share=True)  # For Colab, use share=True


Stroke class distribution:
 stroke
0    4733
1     248
Name: count, dtype: int64
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.6948 - loss: 0.5985 - val_accuracy: 0.7793 - val_loss: 0.5054
Epoch 2/10
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7871 - loss: 0.4622 - val_accuracy: 0.7878 - val_loss: 0.4484
Epoch 3/10
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7807 - loss: 0.4692 - val_accuracy: 0.7888 - val_loss: 0.4396
Epoch 4/10
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7924 - loss: 0.4489 - val_accuracy: 0.7962 - val_loss: 0.4311
Epoch 5/10
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7980 - loss: 0.4340 - val_accuracy: 0.8036 - val_loss: 0.4068
Epoch 6/10
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8006 - loss: 0.4349 - val_accuracy: 0.8004 - val_loss: 0.4106
Epoch 7/10
[1m267/267[0m [32m━━━━━━━

