In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load dataset
df = pd.read_csv('kidney_disease.csv')

# Clean column names (remove spaces)

# Convert target to binary and handle missing values
df['classification'] = df['classification'].map({'ckd': 1, 'notckd': 0})

# Drop rows where the target is NaN
df = df.dropna(subset=['classification'])

# Handle string values in numeric columns
df.replace({'normal': 0, 'abnormal': 1, 'present': 1, 'notpresent': 0, 
            'yes': 1, 'no': 0, '\tno': 0, '\tyes': 1, 'good': 0, 'poor': 1}, inplace=True)

# Convert object columns to numeric where possible
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Create a new column for Kidney Disease Status
df['Kidney Disease Status'] = df['classification'].map({1: 'Chronic Kidney Disease', 0: 'No Kidney Disease'})

# Generate the report
kidney_report = df[['classification', 'Kidney Disease Status']]

# Print the report
print("Kidney Disease Status Report:")
print(kidney_report)

# Save the report to a CSV file
kidney_report.to_csv('kidney_disease_status_report.csv', index=False)
print("\nReport saved to 'kidney_disease_status_report.csv'.")

# Separate features and target
X = df.drop(['classification', 'Kidney Disease Status'], axis=1)
y = df['classification'].values

# Identify numeric and categorical columns
numeric_cols = X.select_dtypes(include=np.number).columns
categorical_cols = X.select_dtypes(exclude=np.number).columns

# Create preprocessing pipeline
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))  # Use OneHotEncoder for categorical columns
])

# Combine preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Apply preprocessing
X_processed = preprocessor.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

# Reshape for Conv1D (samples, features, channels)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build improved CNN model
model = Sequential([
    Conv1D(64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=1),
    Dropout(0.3),
    
    Conv1D(128, kernel_size=2, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=1),
    Dropout(0.3),
    
    Flatten(),
    
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', 'AUC']
)

# Train with early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
loss, accuracy, auc = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}, AUC: {auc:.4f}")

ModuleNotFoundError: No module named 'numpy'