In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC ## Demo_train_Notebook1
# MAGIC This notebook is a minimal prototype for training a sample ML model on Databricks.
# MAGIC 
# MAGIC **Purpose:** Demonstrate full notebook lifecycle for automation & job integration.
# MAGIC 
# MAGIC - Uses mock data
# MAGIC - Trains fast
# MAGIC - Logs to MLflow
# MAGIC - Staff-level architecture best practices


In [0]:
#install dependencies
%pip install pandas scikit-learn mlflow --quiet


In [0]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn


In [0]:
data = pd.DataFrame({
    "feature1": np.random.rand(100),
    "feature2": np.random.rand(100),
    "label": np.random.randint(0, 2, 100)
})


In [0]:
# For demo: keep preprocessing minimal
X = data[["feature1", "feature2"]]
y = data["label"]


In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [0]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [0]:
predictions = model.predict(X_test)
acc = accuracy_score(y_test, predictions)
print(f"Accuracy: {acc}")


In [0]:
with mlflow.start_run():
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_metric("accuracy", acc)
    mlflow.sklearn.log_model(model, "model")


In [0]:
print("✅ Training complete and model logged.")
