In [None]:
import pandas as pd
import yaml
import ast
import re
import sklearn
from openai import OpenAI
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

model_mapping = {
    "LogisticRegression": LogisticRegression,
    "DecisionTreeClassifier": DecisionTreeClassifier,
    "RandomForestClassifier": RandomForestClassifier,
    "DecisionTreeRegressor":DecisionTreeRegressor,
    "LinearRegression":LinearRegression
}

def load_config(config_path='/Users/serdarc/Desktop/ml-zoomcamp/midterm_project/config.yaml'):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

def load_data(dataset_path):
    return pd.read_csv(dataset_path)

def preprocess_data(df):
    label_encoders = {}
    for column in df.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
    return df, label_encoders

def call_llm(prompt, api_key):
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert in machine learning and able to evaluate the model well."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices.message.content.strip()

def clean_hyperparameter_suggestion(suggestion):
    pattern = r'\{.*?\}'
    match = re.search(pattern, suggestion, re.DOTALL)
    if match:
        cleaned_suggestion = match.group(0)
        return cleaned_suggestion
    else:
        print("Could not find a dictionary in the hyperparameter suggestion.")
        return None

def extract_model_name(llm_response, available_models):
    for model in available_models:
        pattern = r'\b' + re.escape(model) + r'\b'
        if re.search(pattern, llm_response, re.IGNORECASE):
            return model
    return None

def validate_hyperparameters(model_class, hyperparameters):
    valid_params = model_class().get_params()
    invalid_params = []
    for param, value in hyperparameters.items():
        if param not in valid_params:
            invalid_params.append(param)
        else:
            if param == 'max_features' and value == 'auto':
                print(f"Invalid value for parameter '{param}': '{value}'")
                invalid_params.append(param)
    if invalid_params:
        print(f"Invalid hyperparameters for {model_class.__name__}: {invalid_params}")
        return False
    return True

def correct_hyperparameters(hyperparameters, model_name):
    corrected = False
    if model_name == "RandomForestClassifier":
        if 'max_features' in hyperparameters and hyperparameters['max_features'] == 'auto':
            print("Correcting 'max_features' from 'auto' to 'sqrt' for RandomForestClassifier.")
            hyperparameters['max_features'] = 'sqrt'
            corrected = True
    return hyperparameters, corrected

def train_and_evaluate(X_train, X_test, y_train, y_test, model_name, hyperparameters=None):
    if model_name not in model_mapping:
        print(f"Valid model names are: {list(model_mapping.keys())}")
        return None, None
    model_class = model_mapping.get(model_name)
    try:
        if hyperparameters:
            hyperparameters, corrected = correct_hyperparameters(hyperparameters, model_name)
            if not validate_hyperparameters(model_class, hyperparameters):
                return None, None
            model = model_class(**hyperparameters)
        else:
            model = model_class()
    except Exception as e:
        print(f"Error instantiating model with hyperparameters: {e}")
        return None, None
    try:
        model.fit(X_train, y_train)
    except Exception as e:
        print(f"Error during model fitting: {e}")
        return None, None
    y_pred = model.predict(X_test)
    metrics = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average='weighted', zero_division=0),
        "recall": recall_score(y_test, y_pred, average='weighted', zero_division=0),
        "f1_score": f1_score(y_test, y_pred, average='weighted', zero_division=0)
    }
    return metrics, model

def run_llm_based_model_selection_experiment(df, config):
    #Model Training
    X = df.drop("PASSENGERS", axis=1)
    y = df["PASSENGERS"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    available_models = config['default_models']
    model_performance = {}
    for model_name in available_models:
        print(f"Training model: {model_name}")
        metrics, _ = train_and_evaluate(X_train, X_test, y_train, y_test, model_name)
        model_performance[model_name] = metrics
        print(f"Model: {model_name} | Metrics: {metrics}")

    #LLM selecting the best model
    sklearn_version = sklearn.__version__
    prompt = (
        f"I have trained the following models with these metrics: {model_performance}. "
        "Which model should I select based on the best performance?"
    )
    best_model_response = call_llm(prompt, config['llm_api_key'])
    print(f"LLM response for best model selection:\n{best_model_response}")
    best_model = extract_model_name(best_model_response, available_models)
    if not best_model:
        print("Error: Could not extract a valid model name from LLM response.")
        return
    print(f"LLM selected the best model: {best_model}")

    #Check for hyperparameter tuning
    prompt_tuning = (
        f"The selected model is {best_model}. Can you suggest hyperparameters for better performance? "
        "Please provide them in Python dictionary format, like {'max_depth': 5, 'min_samples_split': 4}. "
        f"Ensure that all suggested hyperparameters are valid for scikit-learn version {sklearn_version}, "
        "and avoid using deprecated or invalid values such as 'max_features': 'auto'. "
        "Don't provide any explanation or return in any other format."
    )
    tuning_suggestion = call_llm(prompt_tuning, config['llm_api_key'])
    print(f"Hyperparameter tuning suggestion received:\n{tuning_suggestion}")
    cleaned_suggestion = clean_hyperparameter_suggestion(tuning_suggestion)
    if cleaned_suggestion is None:
        suggested_params = None
    else:
        try:
            suggested_params = ast.literal_eval(cleaned_suggestion)
            if not isinstance(suggested_params, dict):
                print("Hyperparameter suggestion is not a valid dictionary.")
                suggested_params = None
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing hyperparameter suggestion: {e}")
            suggested_params = None

    #Automatically run hyperparameter tuning if suggested
    if suggested_params:
        print(f"Running {best_model} with suggested hyperparameters: {suggested_params}")
        tuned_metrics, _ = train_and_evaluate(
            X_train, X_test, y_train, y_test, best_model, hyperparameters=suggested_params
        )
        print(f"Metrics after tuning: {tuned_metrics}")
    else:
        print("No valid hyperparameters were provided for tuning.")

def main():
    config = load_config()
    df = load_data(config['dataset_path'])
    df, _ = preprocess_data(df)
    run_llm_based_model_selection_experiment(df, config)

if __name__ == "__main__":
    main()


Training model: LogisticRegression
Error during model fitting: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values
Model: LogisticRegression | Metrics: None
Training model: DecisionTreeClassifier
