In [None]:
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
df = pd.read_csv("C:/Users/admin/Downloads/data.csv")
df.head()

# Dropping columns that are not needed
df = df.drop(columns=['id', 'Unnamed: 32'])

# Map the target to binary values: 'M' to 1 (malignant), 'B' to 0 (benign)
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

# Separate features and target datasets
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=102)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 10000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
model = LogisticRegression(**params)
model.fit(X_train, y_train)


In [None]:

# Predict and evaluate the model
y_pred = model.predict(X_test)
print("Logistic Regression:")
print(classification_report(y_test, y_pred))

class_report = classification_report(y_test, y_pred, output_dict=True)

# Check existing experiments
client = MlflowClient()
print("Listing all experiments:")
experiments = client.search_experiments()
for exp in experiments:
    print(f"Experiment ID: {exp.experiment_id}, Name: {exp.name}")

# Set up MLflow experiment
experiment_name = "breastcancer_data"
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

# Create the experiment if it doesn't exist
try:
    experiment_id = client.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    experiment = client.get_experiment_by_name(experiment_name)
    experiment_id = experiment.experiment_id

mlflow.set_experiment(experiment_name)

# Log metrics to MLflow
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': class_report['accuracy'],
        'recall_class_0': class_report['0']['recall'],
        'recall_class_1': class_report['1']['recall'],
        'f1_score': class_report['macro avg']['f1-score']
    })
    mlflow.sklearn.log_model(model, "Logistic Regression")



In [None]:

print("RandomForestClassifier:")
print(classification_report(y_test, y_pred))

class_report = classification_report(y_test, y_pred, output_dict=True)

# Check existing experiments
client = MlflowClient()
print("Listing all experiments:")
experiments = client.search_experiments()
for exp in experiments:
    print(f"Experiment ID: {exp.experiment_id}, Name: {exp.name}")

# Set up MLflow experiment
experiment_name = "breastcancer_data"
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

# Create the experiment if it doesn't exist
try:
    experiment_id = client.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    experiment = client.get_experiment_by_name(experiment_name)
    experiment_id = experiment.experiment_id

mlflow.set_experiment(experiment_name)

# Log metrics to MLflow
with mlflow.start_run(run_name="RandomForestClassifier Model"):
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': class_report['accuracy'],
        'recall_class_0': class_report['0']['recall'],
        'recall_class_1': class_report['1']['recall'],
        'f1_score': class_report['macro avg']['f1-score']
    })
    mlflow.sklearn.log_model(model, "RandomForestClassifier")