In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import yaml
import pickle

# Load the wine dataset
wine = load_wine()
X = wine.data
y = wine.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
models = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'LogisticRegression': LogisticRegression(random_state=42)
}

# Train and evaluate models
best_model = None
best_f1_score = 0
model_scores = {}
best_model_instance = None

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred, average='weighted')
    model_scores[model_name] = f1
    if f1 > best_f1_score:
        best_f1_score = f1
        best_model = model_name
        best_model_instance = model

# Print the best model and its F1 score
print(f"Best Model: {best_model} with F1 Score: {best_f1_score}")

# Save the best model details in a .yaml file
model_details = {
    'best_model': best_model,
    'best_f1_score': best_f1_score,
    'model_scores': model_scores
}

with open('best_model_details.yaml', 'w') as file:
    yaml.dump(model_details, file)

# Save the best model as a pickle file
with open('best_model.pkl', 'wb') as file:
    pickle.dump(best_model_instance, file)
