# Mlflow setup test

This notebook is used to test the mlflow setup with a dummy classification experiment.

In [2]:
import mlflow
import dagshub
from mlflow.tracking import MlflowClient
import os

from dotenv import load_dotenv, find_dotenv

# Automatically find and load .env file from any parent directory
load_dotenv(find_dotenv())

DAGSHUB_USER_NAME = os.getenv("DAGSHUB_USER_NAME")
DAGSHUB_TOKEN = os.getenv("DAGSHUB_TOKEN")
# tracking_uri = os.getenv("MLFLOW_TRACKING_URI")
DAGSHUB_REPO = os.getenv("DAGSHUB_REPO")


# ✅ Set credentials and tracking URI
# os.environ["MLFLOW_TRACKING_USERNAME"] = DAGSHUB_USER_NAME
os.environ["MLFLOW_TRACKING_PASSWORD"] = DAGSHUB_TOKEN
# os.environ['MLFLOW_TRACKING_URI']=f"https://dagshub.com/{DAGSHUB_USER_NAME}/{DAGSHUB_REPO}.mlflow"
# tracking_uri = os.environ['MLFLOW_TRACKING_URI']
# mlflow.set_tracking_uri(tracking_uri)

# mlflow.set_experiment("socialsphere_classification")

# Initialize DagsHub tracking
dagshub.init(repo_owner=DAGSHUB_USER_NAME, repo_name=DAGSHUB_REPO, mlflow=True)

# Set tracking URI to local directory
# mlflow.set_tracking_uri("file:../mlruns")  # Adjust path as needed if notebook is nested

# Set experiment
# mlflow.set_experiment("Test_Classification2")

# Create or get experiment
experiment_name = "Classification_Conflicts"
mlflow.set_experiment(experiment_name)
# DAGSHUB_TOKEN
print(DAGSHUB_TOKEN)
print(DAGSHUB_USER_NAME)
print(DAGSHUB_REPO)
# print(tracking_uri)

f15e997b9299f1ec88303326e6e9474869c628e8
bab-git
SDS-social-sphere


In [None]:
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import mlflow.sklearn
import shutil

# dagshub.init('bab-git', 'SDS-social-sphere')
# mlflow.set_tracking_uri('https://dagshub.com/bab-git/SDS-social-sphere.mlflow')
# mlflow.sklearn.autolog()

# Minimal dummy data
X = pd.DataFrame({"feature1": [1, 0, 1, 0], "feature2": [0, 1, 1, 0]})
y = [1, 0, 1, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

from mlflow.models.signature import infer_signature



with mlflow.start_run():
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)

    signature = infer_signature(X_test, preds)
    
    mlflow.log_param("strategy", "most_frequent")
    mlflow.log_metric("accuracy", acc)

    # 1) save locally
    shutil.rmtree("tmp_model", ignore_errors=True)
    # mlflow.sklearn.save_model(clf, path="tmp_model")
    # 2) push as artifacts
    # mlflow.log_artifacts("tmp_model", artifact_path="my_model")

    mlflow.sklearn.log_model(
        clf, 
        artifact_path="dummy_model", 
        signature=signature,
        input_example=X_test.iloc[0:1]
        )




🏃 View run charming-yak-137 at: https://dagshub.com/bab-git/SDS-social-sphere.mlflow/#/experiments/2/runs/6f6ace86cfda4e73af2e5625c7107754
🧪 View experiment at: https://dagshub.com/bab-git/SDS-social-sphere.mlflow/#/experiments/2
