In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
df = pd.read_csv("Improved_Patient_Risk_Calculator_Dataset.csv")

# Drop any NaN values
df.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_cols = ["Gender", "Smoking Status", "Drinking Status", "Disease Name", "Lab Tests", "Policy Plan"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))  # Ensure string type before encoding
    label_encoders[col] = le

# Convert all object type columns to string and encode if any left
for col in df.select_dtypes(include=['object']).columns:
    df[col] = df[col].astype(str).factorize()[0]

# Normalize continuous variables
scaler = StandardScaler()
continuous_cols = ["Age", "BMI", "Claim Amount", "Number of Days Admitted"]
df[continuous_cols] = scaler.fit_transform(df[continuous_cols])

# Define features and target
target = "Risk Score"
X = df.drop(columns=[target])
y = df[target].astype(int) - 1  # Risk Score is from 1-5, shifting to 0-4 for model training

# Convert to NumPy arrays
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int32)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

# Build deep learning model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(5, activation='softmax')  # 5 risk score classes
])

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model with early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32,
                    class_weight=class_weights_dict, callbacks=[early_stopping, reduce_lr])

# Evaluate model
eval_results = model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

# Save model
model.save("patient_risk_model.h5")

Epoch 1/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.1956 - loss: 3.7089 - val_accuracy: 0.2623 - val_loss: 2.6862 - learning_rate: 0.0010
Epoch 2/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.2079 - loss: 2.8901 - val_accuracy: 0.2348 - val_loss: 2.4729 - learning_rate: 0.0010
Epoch 3/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.1971 - loss: 2.6194 - val_accuracy: 0.2147 - val_loss: 2.2915 - learning_rate: 0.0010
Epoch 4/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.1890 - loss: 2.3722 - val_accuracy: 0.2511 - val_loss: 2.1456 - learning_rate: 0.0010
Epoch 5/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.1991 - loss: 2.2006 - val_accuracy: 0.2541 - val_loss: 2.0303 - learning_rate: 0.0010
Epoch 6/50
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [



Test Loss: 0.5903410911560059, Test Accuracy: 0.5950965881347656


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_path = "/content/Improved_Patient_Risk_Calculator_Dataset.csv"
df = pd.read_csv(file_path)

# Drop non-essential columns for model training
df = df.drop(columns=["Patient ID", "Patient Name", "Suggestions"])

# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into features and target
X = df.drop(columns=["Risk Score"])
y = df["Risk Score"]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       199
           2       1.00      1.00      1.00       618
           3       1.00      1.00      1.00       423
           4       1.00      1.00      1.00       384
           5       1.00      1.00      1.00       376

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_path = "/content/Improved_Patient_Risk_Calculator_Dataset.csv"
df = pd.read_csv(file_path)

# Drop non-essential columns for model training
df = df.drop(columns=["Patient ID", "Patient Name", "Suggestions"])

# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into features and target
X = df.drop(columns=["Risk Score"])
y = df["Risk Score"]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest model with regularization
model = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42)
model.fit(X_train, y_train)

# Cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
print(f"Cross-validation Accuracy: {np.mean(cv_scores):.2f}")

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Cross-validation Accuracy: 1.00
Test Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       199
           2       1.00      1.00      1.00       618
           3       1.00      1.00      1.00       423
           4       1.00      1.00      1.00       384
           5       1.00      1.00      1.00       376

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_path = "/content/Improved_Patient_Risk_Calculator_Dataset.csv"
df = pd.read_csv(file_path)

# Drop non-essential columns for model training
df = df.drop(columns=["Patient ID", "Patient Name", "Suggestions"])

# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into features and target
X = df.drop(columns=["Risk Score"])
y = df["Risk Score"]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest model with stronger regularization to reduce overfitting
model = RandomForestClassifier(n_estimators=50, max_depth=5, min_samples_split=10, min_samples_leaf=5, random_state=42)
model.fit(X_train, y_train)

# Cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
print(f"Cross-validation Accuracy: {np.mean(cv_scores):.2f}")

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Cross-validation Accuracy: 1.00
Test Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       199
           2       1.00      1.00      1.00       618
           3       1.00      1.00      1.00       423
           4       1.00      1.00      1.00       384
           5       1.00      1.00      1.00       376

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

