In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import mlflow
import mlflow.pyfunc


df = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)

# Start an MLflow run
mlflow.set_experiment("Wine Quality Visualization")
with mlflow.start_run():
    # Plot histograms for all features
    histograms = df.hist(figsize=(10, 10), bins=20)
    plt.tight_layout()
    plt.suptitle("Histogram of Wine Quality Features")
    plt.subplots_adjust(top=0.95)
    
    # Save the plot and log it to MLflow
    hist_plot_path = "histograms.png"
    plt.savefig(hist_plot_path)
    mlflow.log_artifact(hist_plot_path)

    # Pair plot of the dataset
    pair_plot = sns.pairplot(df, hue="quality")
    pair_plot.fig.suptitle("Pair Plot of Wine Quality Dataset", y=1.02)
    
    # Save the pair plot and log it to MLflow
    pair_plot_path = "pair_plot.png"
    pair_plot.savefig(pair_plot_path)
    mlflow.log_artifact(pair_plot_path)

    # Scatter plot of two selected features with quality as hue
    scatter_plot = sns.scatterplot(data=df, x="alcohol", y="density", hue="quality", palette="coolwarm")
    plt.title("Scatter Plot of Alcohol vs Density")
    
    # Save the scatter plot and log it to MLflow
    scatter_plot_path = "scatter_plot.png"
    plt.savefig(scatter_plot_path)
    mlflow.log_artifact(scatter_plot_path)

    # Set tags and log parameters if needed
    mlflow.set_tag("Visualization", "Wine Quality Dataset")
    mlflow.log_param("data_shape", df.shape)

print("Visualizations logged to MLflow.")
