<a href="https://colab.research.google.com/github/KMSH-JWL/NEWSandLABAI/blob/main/2_sets_of_NEWS%2BLAB_for_training_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

news1 and lab1 contain total 5233 cases with NEWS score and LAB score coded with only Na, K, CRP, Hb.

In [16]:
import pandas as pd
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load the dataset (replace 'dataset.csv' with the actual file name and path)
data = pd.read_csv('news1_lab1.csv')

# Split the data into features (NEWS and LAB scores) and labels (clinically critical or stable)
X = data[['NEWS1', 'LAB1']]
y = data['status']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Print the number of samples in each class after applying SMOTE
print("Before SMOTE:")
print(y.value_counts())

print("\nAfter SMOTE:")
print(y_resampled.value_counts())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a decision tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Perform k-fold cross-validation for decision tree classifier
scores_dt = cross_val_score(decision_tree, X_train, y_train, cv=5)

# Train a random forest classifier
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)

# Perform k-fold cross-validation for random forest classifier
scores_rf = cross_val_score(random_forest, X_train, y_train, cv=5)

# Print cross-validation scores
print("Decision Tree Cross-Validation Scores:", scores_dt)
print("Mean Decision Tree Cross-Validation Score:", np.mean(scores_dt))
print("\nRandom Forest Cross-Validation Scores:", scores_rf)
print("Mean Random Forest Cross-Validation Score:", np.mean(scores_rf))

# Make predictions on the test set using the decision tree classifier
y_pred_dt = decision_tree.predict(X_test)

# Calculate evaluation metrics for the decision tree classifier
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt)

# Make predictions on the test set using the random forest classifier
y_pred_rf = random_forest.predict(X_test)

# Calculate evaluation metrics for the random forest classifier
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf)

# Print the evaluation metrics and confusion matrices
print("\nDecision Tree Results:")
print("Accuracy:", accuracy_dt)
print("Precision Score:", precision_dt)
print("Recall Score:", recall_dt)
print("Confusion Matrix:\n", confusion_matrix_dt)

print("\nRandom Forest Results:")
print("Accuracy:", accuracy_rf)
print("Precision Score:", precision_rf)
print("Recall Score:", recall_rf)
print("Confusion Matrix:\n", confusion_matrix_rf)



Before SMOTE:
s    4645
c     588
Name: status, dtype: int64

After SMOTE:
c    4645
s    4645
Name: status, dtype: int64
Decision Tree Cross-Validation Scores: [0.70073974 0.7061197  0.71063257 0.68236878 0.69246299]
Mean Decision Tree Cross-Validation Score: 0.6984647564672202

Random Forest Cross-Validation Scores: [0.70006725 0.7067922  0.70995962 0.68034993 0.69179004]
Mean Random Forest Cross-Validation Score: 0.697791808957126

Decision Tree Results:
Accuracy: 0.6969860064585576
Precision Score: 0.697543985891809
Recall Score: 0.6969860064585576
Confusion Matrix:
 [[631 304]
 [259 664]]

Random Forest Results:
Accuracy: 0.6953713670613563
Precision Score: 0.6959958684424401
Recall Score: 0.6953713670613563
Confusion Matrix:
 [[628 307]
 [259 664]]


If training is done only with NEWS score of 5233 cases (news1).

In [17]:
import pandas as pd
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load the dataset (replace 'dataset.csv' with the actual file name and path)
data = pd.read_csv('news1_lab1.csv')

# Split the data into features (NEWS and LAB scores) and labels (clinically critical or stable)
X = data[['NEWS1']]
y = data['status']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Print the number of samples in each class after applying SMOTE
print("Before SMOTE:")
print(y.value_counts())

print("\nAfter SMOTE:")
print(y_resampled.value_counts())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a decision tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Perform k-fold cross-validation for decision tree classifier
scores_dt = cross_val_score(decision_tree, X_train, y_train, cv=5)

# Train a random forest classifier
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)

# Perform k-fold cross-validation for random forest classifier
scores_rf = cross_val_score(random_forest, X_train, y_train, cv=5)

# Print cross-validation scores
print("Decision Tree Cross-Validation Scores:", scores_dt)
print("Mean Decision Tree Cross-Validation Score:", np.mean(scores_dt))
print("\nRandom Forest Cross-Validation Scores:", scores_rf)
print("Mean Random Forest Cross-Validation Score:", np.mean(scores_rf))

# Make predictions on the test set using the decision tree classifier
y_pred_dt = decision_tree.predict(X_test)

# Calculate evaluation metrics for the decision tree classifier
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt)

# Make predictions on the test set using the random forest classifier
y_pred_rf = random_forest.predict(X_test)

# Calculate evaluation metrics for the random forest classifier
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf)

# Print the evaluation metrics and confusion matrices
print("\nDecision Tree Results:")
print("Accuracy:", accuracy_dt)
print("Precision Score:", precision_dt)
print("Recall Score:", recall_dt)
print("Confusion Matrix:\n", confusion_matrix_dt)

print("\nRandom Forest Results:")
print("Accuracy:", accuracy_rf)
print("Precision Score:", precision_rf)
print("Recall Score:", recall_rf)
print("Confusion Matrix:\n", confusion_matrix_rf)



Before SMOTE:
s    4645
c     588
Name: status, dtype: int64

After SMOTE:
c    4645
s    4645
Name: status, dtype: int64
Decision Tree Cross-Validation Scores: [0.67316745 0.6664425  0.67631225 0.67092867 0.67362046]
Mean Decision Tree Cross-Validation Score: 0.6720942651476547

Random Forest Cross-Validation Scores: [0.67316745 0.6664425  0.67631225 0.67092867 0.67362046]
Mean Random Forest Cross-Validation Score: 0.6720942651476547

Decision Tree Results:
Accuracy: 0.6636167922497309
Precision Score: 0.6637709201633951
Recall Score: 0.6636167922497309
Confusion Matrix:
 [[611 324]
 [301 622]]

Random Forest Results:
Accuracy: 0.6636167922497309
Precision Score: 0.6637709201633951
Recall Score: 0.6636167922497309
Confusion Matrix:
 [[611 324]
 [301 622]]


news2 and lab2 contain total 541 cases with NEWS score and LAB score coded with pH, Lactate, Na, K, CRP, Hb.

In [18]:
import pandas as pd
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load the dataset (replace 'dataset.csv' with the actual file name and path)
data = pd.read_csv('new2_lab2.csv')

# Split the data into features (NEWS and LAB scores) and labels (clinically critical or stable)
X = data[['NEWS2', 'LAB2']]
y = data['status']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Print the number of samples in each class after applying SMOTE
print("Before SMOTE:")
print(y.value_counts())

print("\nAfter SMOTE:")
print(y_resampled.value_counts())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a decision tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Perform k-fold cross-validation for decision tree classifier
scores_dt = cross_val_score(decision_tree, X_train, y_train, cv=5)

# Train a random forest classifier
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)

# Perform k-fold cross-validation for random forest classifier
scores_rf = cross_val_score(random_forest, X_train, y_train, cv=5)

# Print cross-validation scores
print("Decision Tree Cross-Validation Scores:", scores_dt)
print("Mean Decision Tree Cross-Validation Score:", np.mean(scores_dt))
print("\nRandom Forest Cross-Validation Scores:", scores_rf)
print("Mean Random Forest Cross-Validation Score:", np.mean(scores_rf))

# Make predictions on the test set using the decision tree classifier
y_pred_dt = decision_tree.predict(X_test)

# Calculate evaluation metrics for the decision tree classifier
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt)

# Make predictions on the test set using the random forest classifier
y_pred_rf = random_forest.predict(X_test)

# Calculate evaluation metrics for the random forest classifier
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf)

# Print the evaluation metrics and confusion matrices
print("\nDecision Tree Results:")
print("Accuracy:", accuracy_dt)
print("Precision Score:", precision_dt)
print("Recall Score:", recall_dt)
print("Confusion Matrix:\n", confusion_matrix_dt)

print("\nRandom Forest Results:")
print("Accuracy:", accuracy_rf)
print("Precision Score:", precision_rf)
print("Recall Score:", recall_rf)
print("Confusion Matrix:\n", confusion_matrix_rf)



Before SMOTE:
s    404
c    137
Name: status, dtype: int64

After SMOTE:
c    404
s    404
Name: status, dtype: int64
Decision Tree Cross-Validation Scores: [0.66923077 0.62015504 0.6744186  0.74418605 0.6744186 ]
Mean Decision Tree Cross-Validation Score: 0.6764818127608825

Random Forest Cross-Validation Scores: [0.70769231 0.65891473 0.72093023 0.76744186 0.68217054]
Mean Random Forest Cross-Validation Score: 0.7074299344066786

Decision Tree Results:
Accuracy: 0.7160493827160493
Precision Score: 0.7301565584875831
Recall Score: 0.7160493827160493
Confusion Matrix:
 [[58 15]
 [31 58]]

Random Forest Results:
Accuracy: 0.7407407407407407
Precision Score: 0.7456180460295687
Recall Score: 0.7407407407407407
Confusion Matrix:
 [[56 17]
 [25 64]]


if NEWS scores of 541 cases (news2) are given for training...

In [19]:
import pandas as pd
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load the dataset (replace 'dataset.csv' with the actual file name and path)
data = pd.read_csv('new2_lab2.csv')

# Split the data into features (NEWS and LAB scores) and labels (clinically critical or stable)
X = data[['NEWS2']]
y = data['status']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Print the number of samples in each class after applying SMOTE
print("Before SMOTE:")
print(y.value_counts())

print("\nAfter SMOTE:")
print(y_resampled.value_counts())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a decision tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Perform k-fold cross-validation for decision tree classifier
scores_dt = cross_val_score(decision_tree, X_train, y_train, cv=5)

# Train a random forest classifier
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)

# Perform k-fold cross-validation for random forest classifier
scores_rf = cross_val_score(random_forest, X_train, y_train, cv=5)

# Print cross-validation scores
print("Decision Tree Cross-Validation Scores:", scores_dt)
print("Mean Decision Tree Cross-Validation Score:", np.mean(scores_dt))
print("\nRandom Forest Cross-Validation Scores:", scores_rf)
print("Mean Random Forest Cross-Validation Score:", np.mean(scores_rf))

# Make predictions on the test set using the decision tree classifier
y_pred_dt = decision_tree.predict(X_test)

# Calculate evaluation metrics for the decision tree classifier
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt)

# Make predictions on the test set using the random forest classifier
y_pred_rf = random_forest.predict(X_test)

# Calculate evaluation metrics for the random forest classifier
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf)

# Print the evaluation metrics and confusion matrices
print("\nDecision Tree Results:")
print("Accuracy:", accuracy_dt)
print("Precision Score:", precision_dt)
print("Recall Score:", recall_dt)
print("Confusion Matrix:\n", confusion_matrix_dt)

print("\nRandom Forest Results:")
print("Accuracy:", accuracy_rf)
print("Precision Score:", precision_rf)
print("Recall Score:", recall_rf)
print("Confusion Matrix:\n", confusion_matrix_rf)



Before SMOTE:
s    404
c    137
Name: status, dtype: int64

After SMOTE:
c    404
s    404
Name: status, dtype: int64
Decision Tree Cross-Validation Scores: [0.66153846 0.60465116 0.58139535 0.65891473 0.5503876 ]
Mean Decision Tree Cross-Validation Score: 0.6113774597495528

Random Forest Cross-Validation Scores: [0.66153846 0.60465116 0.6124031  0.64341085 0.5503876 ]
Mean Random Forest Cross-Validation Score: 0.6144782349433513

Decision Tree Results:
Accuracy: 0.6049382716049383
Precision Score: 0.6015534534053053
Recall Score: 0.6049382716049383
Confusion Matrix:
 [[36 37]
 [27 62]]

Random Forest Results:
Accuracy: 0.6049382716049383
Precision Score: 0.6015534534053053
Recall Score: 0.6049382716049383
Confusion Matrix:
 [[36 37]
 [27 62]]
