In [2]:
import os 
os.chdir("../")

In [3]:
import pandas as pd
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

2024/10/21 23:36:03 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.
2024/10/21 23:36:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


In [4]:
creditcard_data = pd.read_csv('data/creditcard.csv')
X_creditcard = creditcard_data.drop('Class', axis=1)
y_creditcard = creditcard_data['Class']
X_train_creditcard, X_test_creditcard, y_train_creditcard, y_test_creditcard = train_test_split(X_creditcard, y_creditcard, test_size=0.2, random_state=42)


In [None]:
import mlflow 
mlflow.autolog()
with mlflow.start_run(run_name="My Logistic Regression Model(credit card based) v1"):
    # Logistic Regression
    lr_model = LogisticRegression()
    lr_model.fit(X_train_creditcard, y_train_creditcard)

    # Evaluate logistic regression
    y_pred_lr = lr_model.predict(X_test_creditcard)
    accuracy_lr = accuracy_score(y_test_creditcard, y_pred_lr)
    print(f"Accuracy: {accuracy_lr}")

    mlflow.log_param("model", "logistic_regression")
    mlflow.log_metric("accuracy", accuracy_lr)

    mlflow.sklearn.log_model(lr_model, "lr_model.pkl", input_example=X_train_creditcard[:1])

    run_id = mlflow.active_run().info.run_id
    print(f"run_id {run_id}")

In [None]:
import mlflow
from sklearn.tree import DecisionTreeClassifier

mlflow.autolog()  # Optional: Enable autologging for DecisionTree

with mlflow.start_run(run_name="My Decision Tree Model(credit card based) v1"):
  # Decision Tree Classifier
  dt_model = DecisionTreeClassifier()
  dt_model.fit(X_train_creditcard, y_train_creditcard)

  # Evaluate Decision Tree
  y_pred_dt = dt_model.predict(X_test_creditcard)
  accuracy_dt = accuracy_score(y_test_creditcard, y_pred_dt)
  print(f"Accuracy: {accuracy_dt}")

  # Log parameters and metrics
  mlflow.log_param("model", "decision_tree")
  mlflow.log_metric("accuracy", accuracy_dt)

  # Log trained model
  mlflow.sklearn.log_model(dt_model, "dt_model.pkl", input_example=X_train_creditcard[:1])

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

2024/10/21 23:41:07 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.


2024/10/21 23:41:07 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/10/21 23:41:07 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.


In [None]:
import mlflow
from sklearn.ensemble import RandomForestClassifier

mlflow.autolog()  # Optional: Enable autologging for RandomForest

with mlflow.start_run(run_name="My Random Forest Model(credit card based) v1"):
  # Random Forest Classifier
  rf_model = RandomForestClassifier()
  rf_model.fit(X_train_creditcard, y_train_creditcard)

  # Evaluate Random Forest
  y_pred_rf = rf_model.predict(X_test_creditcard)
  accuracy_rf = accuracy_score(y_test_creditcard, y_pred_rf)
  print(f"Accuracy: {accuracy_rf}")

  # Log parameters and metrics
  mlflow.log_param("model", "random_forest")
  mlflow.log_metric("accuracy", accuracy_rf)

  # Log trained model
  mlflow.sklearn.log_model(rf_model, "rf_model.pkl", input_example=X_train_creditcard[:1])

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

In [None]:
import mlflow
from sklearn.ensemble import GradientBoostingClassifier

mlflow.autolog()  # Optional: Enable autologging for GradientBoosting

with mlflow.start_run(run_name="My Gradient Boosting Model(credit card based) v1"):
  # Gradient Boosting Classifier
  gb_model = GradientBoostingClassifier()
  gb_model.fit(X_train_creditcard, y_train_creditcard)

  # Evaluate Gradient Boosting
  y_pred_gb = gb_model.predict(X_test_creditcard)
  accuracy_gb = accuracy_score(y_test_creditcard, y_pred_gb)
  print(f"Accuracy: {accuracy_gb}")

  # Log parameters and metrics
  mlflow.log_param("model", "gradient_boosting")
  mlflow.log_metric("accuracy", accuracy_gb)

  # Log trained model
  mlflow.sklearn.log_model(gb_model, "gb_model.pkl", input_example=X_train_creditcard[:1])

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

In [None]:
import mlflow
from sklearn.neural_network import MLPClassifier

mlflow.autolog()  # Optional: Enable autologging for MLP

with mlflow.start_run(run_name="My MLP Model(credit card based) v1"):
  # MLP Classifier
  mlp_model = MLPClassifier()
  mlp_model.fit(X_train_creditcard, y_train_creditcard)

  # Evaluate MLP
  y_pred_mlp = mlp_model.predict(X_test_creditcard)
  accuracy_mlp = accuracy_score(y_test_creditcard, y_pred_mlp)
  print(f"Accuracy: {accuracy_mlp}")

  # Log parameters and metrics
  mlflow.log_param("model", "mlp")
  mlflow.log_metric("accuracy", accuracy_mlp)

  # Log trained model
  mlflow.sklearn.log_model(mlp_model, "mlp_model.pkl", input_example=X_train_creditcard[:1])

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

In [None]:
mlflow.autolog()
with mlflow.start_run(run_name="My CNN Model(credit card based) v1"):
  # CNN Model
  cnn_model = Sequential([
      Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # Adjust input shape if needed
      MaxPooling2D((2, 2)),
      Flatten(),
      Dense(128, activation='relu'),
      Dense(1, activation='sigmoid')
  ])

  cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  cnn_model.fit(X_train_creditcard, y_train_creditcard, epochs=10, batch_size=32)

  # Evaluate CNN
  loss, accuracy = cnn_model.evaluate(X_test_creditcard, y_test_creditcard)
  print(f"Accuracy: {accuracy}")

  # Log parameters and metrics
  mlflow.log_param("model", "cnn")
  mlflow.log_metric("accuracy", accuracy)

  # Log trained model
  mlflow.keras.log_model(cnn_model, "cnn_model.h5")

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

In [None]:
mlflow.autolog()
with mlflow.start_run(run_name="My RNN Model(credit card based) v1"):
  # RNN Model
  rnn_model = Sequential([
      SimpleRNN(units=64, activation='relu', input_shape=(None, X_train_creditcard.shape[1])),  # Adjust input shape if needed
      Dense(1, activation='sigmoid')
  ])

  rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  rnn_model.fit(X_train_creditcard, y_train_creditcard, epochs=10, batch_size=32)

  # Evaluate RNN
  loss, accuracy = rnn_model.evaluate(X_test_creditcard, y_test_creditcard)
  print(f"Accuracy: {accuracy}")

  # Log parameters and metrics
  mlflow.log_param("model", "rnn")
  mlflow.log_metric("accuracy", accuracy)

  # Log trained model
  mlflow.keras.log_model(rnn_model, "rnn_model.h5")

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")

In [None]:
mlflow.autolog()
with mlflow.start_run(run_name="My LSTM Model(credit card based) v1"):
  # LSTM Model
  lstm_model = Sequential([
      LSTM(units=64, activation='relu', input_shape=(None, X_train_creditcard.shape[1])),  # Adjust input shape if needed
      Dense(1, activation='sigmoid')
  ])

  lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  lstm_model.fit(X_train_creditcard, y_train_creditcard, epochs=10, batch_size=32)

  # Evaluate LSTM
  loss, accuracy = lstm_model.evaluate(X_test_creditcard, y_test_creditcard)
  print(f"Accuracy: {accuracy}")

  # Log parameters and metrics
  mlflow.log_param("model", "lstm")
  mlflow.log_metric("accuracy", accuracy)

  # Log trained model
  mlflow.keras.log_model(lstm_model, "lstm_model.h5")

  run_id = mlflow.active_run().info.run_id
  print(f"run_id {run_id}")