In [None]:
# Welcome to your new notebook
# Type here in the cell editor to add code!
# Step 1
import pandas as pd
# Load data into pandas DataFrame from Lakehouse
df = pd.read_csv("/lakehouse/default/" + "Files/churn.csv")
display(df)

In [None]:
#Step 2:

from sklearn.model_selection import train_test_split

print("Splitting data...")
X, y = df[['years_with_company','total_day_calls','total_eve_calls','total_night_calls','total_intl_calls','average_call_minutes','total_customer_service_calls','age']].values, df['churn'].values
   
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

In [None]:
#Step 3:

import mlflow
experiment_name = "experiment-churn"
mlflow.set_experiment(experiment_name)

In [None]:
#Step 4:

from sklearn.linear_model import LogisticRegression
   
with mlflow.start_run():
    mlflow.autolog()

    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)

    mlflow.log_param("estimator", "LogisticRegression")

In [None]:
#Step 5:

from sklearn.tree import DecisionTreeClassifier
   
with mlflow.start_run():
    mlflow.autolog()

    model = DecisionTreeClassifier().fit(X_train, y_train)
   
    mlflow.log_param("estimator", "DecisionTreeClassifier")

In [None]:
#Step 6:

import mlflow

experiments = mlflow.search_experiments()
for exp in experiments:
    print(exp.name)

In [None]:
#Step 7:

experiment_name = "experiment-churn"
exp = mlflow.get_experiment_by_name(experiment_name)
print(exp)

In [None]:
#Step 8:

mlflow.search_runs(exp.experiment_id)

In [None]:
#Step 9:

mlflow.search_runs(exp.experiment_id, order_by=["start_time DESC"], max_results=2)

In [None]:
#Step 10:

import matplotlib.pyplot as plt
   
df_results = mlflow.search_runs(exp.experiment_id, order_by=["start_time DESC"], max_results=2)[["metrics.training_accuracy_score", "params.estimator"]]
   
fig, ax = plt.subplots()
ax.bar(df_results["params.estimator"], df_results["metrics.training_accuracy_score"])
ax.set_xlabel("Estimator")
ax.set_ylabel("Accuracy")
ax.set_title("Accuracy by Estimator")
for i, v in enumerate(df_results["metrics.training_accuracy_score"]):
    ax.text(i, v, str(round(v, 2)), ha='center', va='bottom', fontweight='bold')
plt.show()