In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
import pickle


In [14]:
def load_data_from_pickle(obj_name):
    '''
    load_data_from_pickle function loads and deserializes an object from a pickle file.

    Args:
        obj_name (str): The path to the pickle file containing the serialized object. 

    Returns:
        obj (pd.DataFrame): The deserialized object that was stored in the pickle file. 

    '''
    with open(obj_name, 'rb') as file:
        obj = pickle.load(file)
    return obj

In [15]:
X_train = load_data_from_pickle("/Users/sharanyasenthil/Downloads/data_processed_data_X_train.pkl")
y_train = load_data_from_pickle("/Users/sharanyasenthil/Downloads/data_processed_data_y_train.pkl")

In [16]:
X_test = load_data_from_pickle("/Users/sharanyasenthil/Downloads/data_processed_data_X_test.pkl")
y_test = load_data_from_pickle("/Users/sharanyasenthil/Downloads/data_processed_data_y_test.pkl")

In [17]:
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

# Decision Tree
dec_tree = DecisionTreeClassifier()
dec_tree.fit(X_train, y_train)
y_pred_dec_tree = dec_tree.predict(X_test)

# Random Forest
rand_forest = RandomForestClassifier()
rand_forest.fit(X_train, y_train)
y_pred_rand_forest = rand_forest.predict(X_test)

# Evaluation Function
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, cm

# Evaluating the models
log_reg_results = evaluate_model(y_test, y_pred_log_reg)
dec_tree_results = evaluate_model(y_test, y_pred_dec_tree)
rand_forest_results = evaluate_model(y_test, y_pred_rand_forest)

# Displaying the results
print("Logistic Regression Results: ", log_reg_results)
print("Decision Tree Results: ", dec_tree_results)
print("Random Forest Results: ", rand_forest_results)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Results:  (0.9793066754825945, 0.23728813559322035, 0.007099391480730223, 0.013786312161496799, array([[189555,     90],
       [  3916,     28]]))
Decision Tree Results:  (0.9533547877203766, 0.07360831656606305, 0.11130831643002029, 0.08861526039563988, array([[184120,   5525],
       [  3505,    439]]))
Random Forest Results:  (0.9791413768344276, 0.1780821917808219, 0.006592292089249493, 0.012713936430317848, array([[189525,    120],
       [  3918,     26]]))


In [23]:
import mlflow
import mlflow.sklearn

mlflow.set_experiment("Sepsis Prediction Models")

# Set the tracking URI to the local server
mlflow.set_tracking_uri("http://127.0.0.1:5000")



2024/06/13 15:10:59 INFO mlflow.tracking.fluent: Experiment with name 'Sepsis Prediction Models' does not exist. Creating a new experiment.


In [24]:
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, cm

# Logistic Regression
with mlflow.start_run(run_name="Logistic Regression"):
    log_reg = LogisticRegression(class_weight='balanced')
    log_reg.fit(X_train, y_train)
    y_pred_log_reg = log_reg.predict(X_test)
    log_reg_results = evaluate_model(y_test, y_pred_log_reg)
    
    mlflow.log_param("model", "Logistic Regression")
    mlflow.log_metric("accuracy", log_reg_results[0])
    mlflow.log_metric("precision", log_reg_results[1])
    mlflow.log_metric("recall", log_reg_results[2])
    mlflow.log_metric("f1", log_reg_results[3])
    mlflow.sklearn.log_model(log_reg, "model")
    
    print("Logistic Regression Results: ", log_reg_results)

# Decision Tree
with mlflow.start_run(run_name="Decision Tree"):
    dec_tree = DecisionTreeClassifier(class_weight='balanced')
    dec_tree.fit(X_train, y_train)
    y_pred_dec_tree = dec_tree.predict(X_test)
    dec_tree_results = evaluate_model(y_test, y_pred_dec_tree)
    
    mlflow.log_param("model", "Decision Tree")
    mlflow.log_metric("accuracy", dec_tree_results[0])
    mlflow.log_metric("precision", dec_tree_results[1])
    mlflow.log_metric("recall", dec_tree_results[2])
    mlflow.log_metric("f1", dec_tree_results[3])
    mlflow.sklearn.log_model(dec_tree, "model")
    
    print("Decision Tree Results: ", dec_tree_results)

# Random Forest
with mlflow.start_run(run_name="Random Forest"):
    rand_forest = RandomForestClassifier(class_weight='balanced')
    rand_forest.fit(X_train, y_train)
    y_pred_rand_forest = rand_forest.predict(X_test)
    rand_forest_results = evaluate_model(y_test, y_pred_rand_forest)
    
    mlflow.log_param("model", "Random Forest")
    mlflow.log_metric("accuracy", rand_forest_results[0])
    mlflow.log_metric("precision", rand_forest_results[1])
    mlflow.log_metric("recall", rand_forest_results[2])
    mlflow.log_metric("f1", rand_forest_results[3])
    mlflow.sklearn.log_model(rand_forest, "model")
    
    print("Random Forest Results: ", rand_forest_results)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Results:  (0.7788665678318499, 0.0556152667566146, 0.6166328600405679, 0.10202840182073711, array([[148348,  41297],
       [  1512,   2432]]))
Decision Tree Results:  (0.9604936230880887, 0.05841678588459704, 0.06211967545638945, 0.0602113541410666, array([[185696,   3949],
       [  3699,    245]]))
Random Forest Results:  (0.9796321071961733, 1.0, 0.0002535496957403651, 0.0005069708491761723, array([[189645,      0],
       [  3943,      1]]))


In [12]:
X_test.shape, y_test.shape, X_train.shape, y_train.shape

((585664, 18), (193589,), (585664, 18), (585664,))

In [29]:
from google.cloud import storage

# Path to your service account key file
key_path = "/Users/sharanyasenthil/.gcp.json"

# Initialize the client using the service account key
client = storage.Client.from_service_account_json(key_path)

# Example: List all buckets
buckets = list(client.list_buckets())
for bucket in buckets:
    print(bucket.name)


cloud-ai-platform-a0cf14d4-4df3-4f3d-94fb-f9fe06a2b67c
dataproc-staging-us-central1-1035581784142-eeq8irji
dataproc-temp-us-central1-1035581784142-f1cpbm4t
gcf-v2-sources-1035581784142-us-central1
gcf-v2-uploads-1035581784142-us-central1
leafy-sunrise-425218-h4_cloudbuild
sepsis-prediction-mlops
sepsis-prediction-outputs
us-central1-mlops-airflow-0eb255c2-bucket
us-central1-mlops-airflow-2426135b-bucket
us-central1-mlops-airflow-5a16547c-bucket
us-central1-mlops-airflow-83834e06-bucket
us-central1-mlops-airflow-a6ca8f0c-bucket
us-central1-mlops-airflow-ae3f3e99-bucket
us-central1-mlops-airflow-d11424fb-bucket
us-central1-mlops-airflow-f7614037-bucket
us-central1-mlops-airflow-i-7e7733c7-bucket
us-central1-mlops-airflow-i-8417136d-bucket
us-central1-mlops-airflow-i-d875dfd1-bucket
us-central1-mlops-airflow-n-b55e3862-bucket
us-central1-sepsis-mlops-ai-1c8b31e6-bucket
us-central1-sepsis-mlops-ai-2d4ead58-bucket
us-east1-mlops-airflow-8ff0f727-bucket
