In [9]:
import pandas as pd

# Load the cleaned data
CSV_PATH = 'D:\\Softwarica\\Thesis File\\Advanced_medical_healthcare_recommendation\\data\\raw\\'
df = pd.read_csv(CSV_PATH + 'cleaned_ED2013.csv')

# Take a random sample of the data
df_sample = df.sample(n=15000, random_state=42)  # Adjust the n value based on your memory capacity

# Separate features and target variables
X = df_sample.drop(['Diagnosis1', 'Medication1'], axis=1)  # Replace with your actual target columns
y_diagnosis = df_sample['Diagnosis1']  # Target for diagnosis prediction
y_medication = df_sample['Medication1']  # Target for medication recommendation


In [10]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Create transformers for numerical and categorical data
numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())  # Normalize numerical data
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))  # One-hot encode categorical data
])

# Combine transformers into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Split data into training and testing sets for diagnosis prediction
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X, y_diagnosis, test_size=0.2, random_state=42)

# Split data into training and testing sets for medication recommendation
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X, y_medication, test_size=0.2, random_state=42)

# Apply the preprocessing pipeline to the data
X_train_d = preprocessor.fit_transform(X_train_d)
X_test_d = preprocessor.transform(X_test_d)

X_train_m = preprocessor.fit_transform(X_train_m)
X_test_m = preprocessor.transform(X_test_m)

print('Data preprocessing complete.')



Data preprocessing complete.


In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train a Random Forest classifier for diagnosis prediction
rf_model_d = RandomForestClassifier(n_estimators=50, random_state=42)
rf_model_d.fit(X_train_d, y_train_d)

# Evaluate the model on the test set
y_pred_d_rf = rf_model_d.predict(X_test_d)
print("Random Forest Diagnosis Accuracy:", accuracy_score(y_test_d, y_pred_d_rf))
print("Random Forest Diagnosis Classification Report:\n", classification_report(y_test_d, y_pred_d_rf))



Random Forest Diagnosis Accuracy: 0.16433333333333333


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Random Forest Diagnosis Classification Report:
               precision    recall  f1-score   support

          -9       0.75      0.94      0.83        16
       0059-       0.00      0.00      0.00         2
       00845       0.00      0.00      0.00         1
       0088-       0.00      0.00      0.00         3
       0090-       0.00      0.00      0.00         3
       0340-       0.09      0.08      0.09        12
       0341-       1.00      1.00      1.00         1
       0389-       0.57      0.67      0.62         6
       04100       0.00      0.00      0.00         2
       0412-       0.00      0.00      0.00         1
       042--       0.00      0.00      0.00         1
       0529-       0.00      0.00      0.00         1
       0539-       0.00      0.00      0.00         6
       05410       0.00      0.00      0.00         1
       0546-       0.00      0.00      0.00         1
       0549-       0.00      0.00      0.00         2
       0579-       0.00      0.00

In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build a simple NN model
nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_d_resampled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
nn_model.fit(X_train_d_resampled, y_train_d_resampled, epochs=10, batch_size=32, validation_data=(X_test_d, y_test_d))

# Evaluate the model
nn_accuracy = nn_model.evaluate(X_test_d, y_test_d)[1]
print("Neural Network Diagnosis Accuracy:", nn_accuracy)


ModuleNotFoundError: No module named 'tensorflow'

In [5]:
from tensorflow.keras.layers import Conv1D, Flatten

# Build a simple CNN model
cnn_model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_d_resampled.shape[1], 1)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Reshape data for CNN
X_train_d_cnn = X_train_d_resampled.reshape(-1, X_train_d_resampled.shape[1], 1)
X_test_d_cnn = X_test_d.reshape(-1, X_test_d.shape[1], 1)

# Train the model
cnn_model.fit(X_train_d_cnn, y_train_d_resampled, epochs=10, batch_size=32, validation_data=(X_test_d_cnn, y_test_d))

# Evaluate the model
cnn_accuracy = cnn_model.evaluate(X_test_d_cnn, y_test_d)[1]
print("CNN Diagnosis Accuracy:", cnn_accuracy)


ModuleNotFoundError: No module named 'tensorflow'

In [6]:
from tensorflow.keras.layers import SimpleRNN

# Build a simple RNN model
rnn_model = Sequential([
    SimpleRNN(64, activation='relu', input_shape=(X_train_d_resampled.shape[1], 1)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
rnn_model.fit(X_train_d_cnn, y_train_d_resampled, epochs=10, batch_size=32, validation_data=(X_test_d_cnn, y_test_d))

# Evaluate the model
rnn_accuracy = rnn_model.evaluate(X_test_d_cnn, y_test_d)[1]
print("RNN Diagnosis Accuracy:", rnn_accuracy)


ModuleNotFoundError: No module named 'tensorflow'

In [7]:
# Assuming you have collected all the accuracy scores
print(f"Random Forest Diagnosis Accuracy: {rf_accuracy}")
print(f"Neural Network Diagnosis Accuracy: {nn_accuracy}")
print(f"CNN Diagnosis Accuracy: {cnn_accuracy}")
print(f"RNN Diagnosis Accuracy: {rnn_accuracy}")


NameError: name 'rf_accuracy' is not defined