In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('healthcare_dataset.csv')

# Display the first 5 rows
print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

# Display information about the dataset
print(df.info())

| Name          | Age   | Gender   | Blood Type   | Medical Condition   | Date of Admission   | Doctor           | Hospital                   | Insurance Provider   | Billing Amount   | Room Number   | Admission Type   | Discharge Date   | Medication   | Test Results   |
|:--------------|:------|:---------|:-------------|:--------------------|:--------------------|:-----------------|:---------------------------|:---------------------|:-----------------|:--------------|:-----------------|:-----------------|:-------------|:---------------|
| Bobby JacksOn | 30    | Male     | B-           | Cancer              | 2024-01-31          | Matthew Smith    | Sons and Miller            | Blue Cross           | 18856.3          | 328           | Urgent           | 2024-02-02       | Paracetamol  | Normal         |
| LesLie TErRy  | 62    | Male     | A+           | Obesity             | 2019-08-20          | Samantha Davies  | Kim Inc                    | Medicare             | 33643.3          

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Convert 'Date of Admission' and 'Discharge Date' to datetime objects
df['Date of Admission'] = pd.to_datetime(df['Date of Admission'])
df['Discharge Date'] = pd.to_datetime(df['Discharge Date'])

# Calculate 'Length of Stay' in days
df['Length of Stay'] = (df['Discharge Date'] - df['Date of Admission']).dt.days

# Create 'Readmission' target variable (1 for readmission, 0 otherwise)
# Define readmission as a hospital stay of 7 days or less
df['Readmission'] = (df['Length of Stay'] <= 7).astype(int)

# Select features for the model
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Length of Stay']
target = 'Readmission'

X = df[features]
y = df[target]

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Length of Stay']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline with preprocessing and Logistic Regression model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', LogisticRegression(random_state=42, solver='liblinear'))])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

# Explore key risk factors
# Get the trained classifier from the pipeline
trained_classifier = model.named_steps['classifier']
feature_names = model.named_steps['preprocessor'].get_feature_names_out()

# Get coefficients for Logistic Regression (risk factors)
if isinstance(trained_classifier, LogisticRegression):
    coefficients = trained_classifier.coef_[0]
    feature_importance = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})
    feature_importance['Absolute Coefficient'] = np.abs(feature_importance['Coefficient'])
    feature_importance = feature_importance.sort_values(by='Absolute Coefficient', ascending=False)
    print("\nTop 10 Risk Factors for Readmission:")
    print(feature_importance.head(10).to_markdown(index=False, numalign="left", stralign="left"))

Model Accuracy: 1.0000

Top 10 Risk Factors for Readmission:
| Feature                             | Coefficient   | Absolute Coefficient   |
|:------------------------------------|:--------------|:-----------------------|
| num__Length of Stay                 | -25.9732      | 25.9732                |
| cat__Gender_Male                    | -6.02325      | 6.02325                |
| cat__Gender_Female                  | -5.98416      | 5.98416                |
| cat__Admission Type_Elective        | -4.04792      | 4.04792                |
| cat__Admission Type_Emergency       | -4.01565      | 4.01565                |
| cat__Admission Type_Urgent          | -3.94384      | 3.94384                |
| cat__Medical Condition_Cancer       | -2.07719      | 2.07719                |
| cat__Medical Condition_Obesity      | -2.05092      | 2.05092                |
| cat__Medical Condition_Arthritis    | -2.00539      | 2.00539                |
| cat__Medical Condition_Hypertension | -1.96621

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Re-create 'Length of Stay' (it's fine to keep it for defining the new target)
df['Length of Stay'] = (pd.to_datetime(df['Discharge Date']) - pd.to_datetime(df['Date of Admission'])).dt.days

# Redefine the target variable: Predict if Length of Stay is > 7 days
df['Length_of_Stay_GT_7'] = (df['Length of Stay'] > 7).astype(int)

# Select features for the model (excluding 'Length of Stay' from features)
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Billing Amount']
target = 'Length_of_Stay_GT_7'

X = df[features]
y = df[target]

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Billing Amount']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline with preprocessing and Logistic Regression model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', LogisticRegression(random_state=42, solver='liblinear'))])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy (predicting Length of Stay > 7 days): {accuracy:.4f}")

# Explore key risk factors for Longer Stays
trained_classifier = model.named_steps['classifier']
feature_names = model.named_steps['preprocessor'].get_feature_names_out()

if isinstance(trained_classifier, LogisticRegression):
    coefficients = trained_classifier.coef_[0]
    feature_importance = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})
    feature_importance['Absolute Coefficient'] = np.abs(feature_importance['Coefficient'])
    feature_importance = feature_importance.sort_values(by='Absolute Coefficient', ascending=False)
    print("\nTop 10 Risk Factors for Longer Stays (> 7 days):")
    print(feature_importance.head(10).to_markdown(index=False, numalign="left", stralign="left"))

Model Accuracy (predicting Length of Stay > 7 days): 0.7674

Top 10 Risk Factors for Longer Stays (> 7 days):
| Feature                             | Coefficient   | Absolute Coefficient   |
|:------------------------------------|:--------------|:-----------------------|
| cat__Gender_Male                    | 0.310727      | 0.310727               |
| cat__Gender_Female                  | 0.281435      | 0.281435               |
| cat__Admission Type_Elective        | 0.233668      | 0.233668               |
| cat__Admission Type_Emergency       | 0.199009      | 0.199009               |
| cat__Admission Type_Urgent          | 0.159485      | 0.159485               |
| cat__Medical Condition_Cancer       | 0.123222      | 0.123222               |
| cat__Medical Condition_Arthritis    | 0.114284      | 0.114284               |
| cat__Medical Condition_Asthma       | 0.100321      | 0.100321               |
| cat__Medical Condition_Hypertension | 0.0905567     | 0.0905567              |

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Re-create 'Length of Stay'
df['Length of Stay'] = (pd.to_datetime(df['Discharge Date']) - pd.to_datetime(df['Date of Admission'])).dt.days

# Redefine the target variable: Predict if Length of Stay is > 7 days
df['Length_of_Stay_GT_7'] = (df['Length of Stay'] > 7).astype(int)

# Select features for the model (excluding 'Length of Stay' from features)
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Billing Amount']
target = 'Length_of_Stay_GT_7'

X = df[features]
y = df[target]

# Check class balance
print("Class distribution for 'Length_of_Stay_GT_7':")
print(y.value_counts().to_markdown(numalign="left", stralign="left"))

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Billing Amount']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline with preprocessing and RandomForestClassifier model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', RandomForestClassifier(random_state=42, n_estimators=100))]) # Increased n_estimators

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Stratify to maintain class balance

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy (RandomForestClassifier, predicting Length of Stay > 7 days): {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Explore key risk factors for Longer Stays using feature importance
trained_classifier = model.named_steps['classifier']
feature_names = model.named_steps['preprocessor'].get_feature_names_out()

if isinstance(trained_classifier, RandomForestClassifier):
    importances = trained_classifier.feature_importances_
    feature_importance_rf = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
    feature_importance_rf = feature_importance_rf.sort_values(by='Importance', ascending=False)
    print("\nTop 10 Risk Factors for Longer Stays (> 7 days) (RandomForestClassifier):")
    print(feature_importance_rf.head(10).to_markdown(index=False, numalign="left", stralign="left"))

Class distribution for 'Length_of_Stay_GT_7':
| Length_of_Stay_GT_7   | count   |
|:----------------------|:--------|
| 1                     | 42514   |
| 0                     | 12986   |

Model Accuracy (RandomForestClassifier, predicting Length of Stay > 7 days): 0.7040

Classification Report:
              precision    recall  f1-score   support

           0       0.27      0.15      0.20      2597
           1       0.77      0.87      0.82      8503

    accuracy                           0.70     11100
   macro avg       0.52      0.51      0.51     11100
weighted avg       0.65      0.70      0.67     11100


Top 10 Risk Factors for Longer Stays (> 7 days) (RandomForestClassifier):
| Feature                             | Importance   |
|:------------------------------------|:-------------|
| num__Billing Amount                 | 0.702028     |
| num__Age                            | 0.280379     |
| cat__Admission Type_Emergency       | 0.00170633   |
| cat__Medical Condition

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import numpy as np

# Re-create 'Length of Stay'
df['Length of Stay'] = (pd.to_datetime(df['Discharge Date']) - pd.to_datetime(df['Date of Admission'])).dt.days

# Redefine the target variable: Predict if Length of Stay is > 7 days
df['Length_of_Stay_GT_7'] = (df['Length of Stay'] > 7).astype(int)

# Select features for the model (excluding 'Length of Stay' from features)
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Billing Amount']
target = 'Length_of_Stay_GT_7'

X = df[features]
y = df[target]

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Billing Amount']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create an imbalanced-learn pipeline with SMOTE and GradientBoostingClassifier
# SMOTE is applied only to the training data after preprocessing
model = ImbPipeline(steps=[('preprocessor', preprocessor),
                           ('smote', SMOTE(random_state=42)),
                           ('classifier', GradientBoostingClassifier(random_state=42, n_estimators=100))])

# Split the data into training and testing sets, stratifying to maintain class balance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy (GradientBoostingClassifier with SMOTE, predicting Length of Stay > 7 days): {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Explore key risk factors for Longer Stays using feature importance
# Note: Feature importance from GradientBoostingClassifier is directly available
trained_classifier = model.named_steps['classifier']
preprocessor_fitted = model.named_steps['preprocessor']
feature_names = preprocessor_fitted.get_feature_names_out()

if isinstance(trained_classifier, GradientBoostingClassifier):
    importances = trained_classifier.feature_importances_
    feature_importance_gb = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
    feature_importance_gb = feature_importance_gb.sort_values(by='Importance', ascending=False)
    print("\nTop 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier with SMOTE):")
    print(feature_importance_gb.head(10).to_markdown(index=False, numalign="left", stralign="left"))


Model Accuracy (GradientBoostingClassifier with SMOTE, predicting Length of Stay > 7 days): 0.5382

Classification Report:
              precision    recall  f1-score   support

           0       0.23      0.43      0.30      2597
           1       0.77      0.57      0.66      8503

    accuracy                           0.54     11100
   macro avg       0.50      0.50      0.48     11100
weighted avg       0.64      0.54      0.57     11100


Top 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier with SMOTE):
| Feature                             | Importance   |
|:------------------------------------|:-------------|
| num__Age                            | 0.974237     |
| num__Billing Amount                 | 0.0169246    |
| cat__Medical Condition_Cancer       | 0.00232831   |
| cat__Medical Condition_Diabetes     | 0.00195835   |
| cat__Medical Condition_Obesity      | 0.0011447    |
| cat__Medical Condition_Asthma       | 0.000906984  |
| cat__Admission Ty

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Re-create 'Length of Stay'
df['Length of Stay'] = (pd.to_datetime(df['Discharge Date']) - pd.to_datetime(df['Date of Admission'])).dt.days

# Redefine the target variable: Predict if Length of Stay is > 7 days
df['Length_of_Stay_GT_7'] = (df['Length of Stay'] > 7).astype(int)

# Select features for the model (excluding 'Length of Stay' from features)
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Billing Amount']
target = 'Length_of_Stay_GT_7'

X = df[features]
y = df[target]

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Billing Amount']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline with preprocessing and GradientBoostingClassifier model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', GradientBoostingClassifier(random_state=42, n_estimators=100))])

# Split the data into training and testing sets, stratifying to maintain class balance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy (GradientBoostingClassifier, predicting Length of Stay > 7 days): {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Explore key risk factors for Longer Stays using feature importance
trained_classifier = model.named_steps['classifier']
preprocessor_fitted = model.named_steps['preprocessor']
feature_names = preprocessor_fitted.get_feature_names_out()

if isinstance(trained_classifier, GradientBoostingClassifier):
    importances = trained_classifier.feature_importances_
    feature_importance_gb = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
    feature_importance_gb = feature_importance_gb.sort_values(by='Importance', ascending=False)
    print("\nTop 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier):")
    print(feature_importance_gb.head(10).to_markdown(index=False, numalign="left", stralign="left"))


Model Accuracy (GradientBoostingClassifier, predicting Length of Stay > 7 days): 0.7659

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2597
           1       0.77      1.00      0.87      8503

    accuracy                           0.77     11100
   macro avg       0.38      0.50      0.43     11100
weighted avg       0.59      0.77      0.66     11100


Top 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier):
| Feature                          | Importance   |
|:---------------------------------|:-------------|
| num__Billing Amount              | 0.80446      |
| num__Age                         | 0.115339     |
| cat__Medical Condition_Cancer    | 0.0167565    |
| cat__Admission Type_Urgent       | 0.0106972    |
| cat__Admission Type_Elective     | 0.00993771   |
| cat__Medical Condition_Obesity   | 0.00780037   |
| cat__Medical Condition_Diabetes  | 0.00723072   |
| cat__Gender

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Re-create 'Length of Stay'
df['Date of Admission'] = pd.to_datetime(df['Date of Admission'])
df['Discharge Date'] = pd.to_datetime(df['Discharge Date'])
df['Length of Stay'] = (df['Discharge Date'] - df['Date of Admission']).dt.days

# Redefine the target variable: Predict if Length of Stay is > 7 days
df['Length_of_Stay_GT_7'] = (df['Length of Stay'] > 7).astype(int)

# Select features for the model (excluding 'Length of Stay' from features)
features = ['Age', 'Gender', 'Medical Condition', 'Admission Type', 'Billing Amount']
target = 'Length_of_Stay_GT_7'

X = df[features]
y = df[target]

# Define categorical and numerical features
categorical_features = ['Gender', 'Medical Condition', 'Admission Type']
numerical_features = ['Age', 'Billing Amount']

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create a pipeline with preprocessing and GradientBoostingClassifier model
# Increased n_estimators to 200, learning_rate adjusted
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', GradientBoostingClassifier(random_state=42, n_estimators=200, learning_rate=0.05))])

# Split the data into training and testing sets, stratifying to maintain class balance
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy (GradientBoostingClassifier, n_estimators=200, predicting Length of Stay > 7 days): {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Explore key risk factors for Longer Stays using feature importance
trained_classifier = model.named_steps['classifier']
preprocessor_fitted = model.named_steps['preprocessor']
feature_names = preprocessor_fitted.get_feature_names_out()

if isinstance(trained_classifier, GradientBoostingClassifier):
    importances = trained_classifier.feature_importances_
    feature_importance_gb = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
    feature_importance_gb = feature_importance_gb.sort_values(by='Importance', ascending=False)
    print("\nTop 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier, n_estimators=200):")
    print(feature_importance_gb.head(10).to_markdown(index=False, numalign="left", stralign="left"))


Model Accuracy (GradientBoostingClassifier, n_estimators=200, predicting Length of Stay > 7 days): 0.7659

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2597
           1       0.77      1.00      0.87      8503

    accuracy                           0.77     11100
   macro avg       0.38      0.50      0.43     11100
weighted avg       0.59      0.77      0.66     11100


Top 10 Risk Factors for Longer Stays (> 7 days) (GradientBoostingClassifier, n_estimators=200):
| Feature                          | Importance   |
|:---------------------------------|:-------------|
| num__Billing Amount              | 0.804002     |
| num__Age                         | 0.112549     |
| cat__Medical Condition_Cancer    | 0.0204282    |
| cat__Admission Type_Urgent       | 0.0104911    |
| cat__Admission Type_Elective     | 0.00925717   |
| cat__Admission Type_Emergency    | 0.0083238    |
| cat__Medical Condition_Diab

In [8]:
from sklearn.metrics import accuracy_score

# y_test: true labels
# y_pred: predicted labels
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7659459459459459


In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [10]:
import pickle

# Save the trained model to 'my_model.pkl'
with open('my_model.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Model saved successfully.")


Model saved successfully.


In [11]:
import os

# Check if the file exists
print(os.path.exists('my_model.pkl'))  # Output: True if saved


True


In [12]:
# Use the loaded model to make predictions
y_pred = loaded_model.predict(X_test)

print("Predictions:", y_pred)


NameError: name 'loaded_model' is not defined