In [17]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from scikeras.wrappers import KerasClassifier

In [18]:
file_path = '../datasets/diagnosis_treatment.csv'
data = pd.read_csv(file_path)

In [19]:
# Encode categorical variables
label_encoders = {
    'Gender': LabelEncoder(),
    'Diagnosis': LabelEncoder(),
    'Treatment': LabelEncoder()
}

data['Gender'] = label_encoders['Gender'].fit_transform(data['Gender'])
data['Diagnosis'] = label_encoders['Diagnosis'].fit_transform(data['Diagnosis'])
data['Treatment'] = label_encoders['Treatment'].fit_transform(data['Treatment'])

In [20]:
# Feature Engineering
data['Age_Bin'] = pd.cut(data['Age'], bins=[0, 18, 35, 50, 65, 100], labels=[0, 1, 2, 3, 4])

# Advanced Collaborative Filtering using SVD
interaction_matrix = data.pivot_table(index='PatientID', columns='Treatment', aggfunc='size', fill_value=0)

# SVD for dimensionality reduction
n_components = 20
svd = TruncatedSVD(n_components=n_components, random_state=42)
patient_features = svd.fit_transform(interaction_matrix)
treatment_features = svd.components_

In [21]:
# Create patient feature matrix
patient_feature_df = pd.DataFrame(patient_features, index=interaction_matrix.index)

# Merge patient features with original data
enhanced_data = pd.merge(data, patient_feature_df, left_on='PatientID', right_index=True)
X = enhanced_data.drop(['Treatment', 'PatientID', 'Notes'], axis=1)
y = enhanced_data['Treatment']

# Convert all column names to strings to avoid mixed types
X.columns = X.columns.astype(str)

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [22]:
# Define a function to create the neural network model
def create_model(optimizer='adam'):
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], kernel_initializer='he_uniform', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(len(label_encoders['Treatment'].classes_), activation='softmax'))
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model using KerasClassifier
model = KerasClassifier(model=create_model, verbose=0)

In [23]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'batch_size': [32],
    'epochs': [50],
    'optimizer': ['adam', 'rmsprop']
}

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
best_model = grid_result.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Best model accuracy: {accuracy}")

Best model accuracy: 1.0


In [29]:
new_patient = pd.DataFrame([[25, 0, label_encoders['Diagnosis'].transform(['Acne'])[0], 0]], 
                           columns=['Age', 'Gender', 'Diagnosis', 'Age_Bin'])
new_patient_scaled = scaler.transform(new_patient)
predicted_treatment = best_model.predict(new_patient)
predicted_treatment_label = label_encoders['Treatment'].inverse_transform(predicted_treatment)
print(f"Recommended treatment: {predicted_treatment_label[0]}")

ValueError: X has shape (4,), but this KerasClassifier is expecting X of shape (24,)