In [None]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix
)
from openpyxl import Workbook, load_workbook
import os

# Function to calculate classification metrics
def calculate_classification_metrics(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'AUC ROC Score', 'GINI'],
        'Value': [precision, recall, f1, auc, gini]
    }
    return metrics, confusion

# Function to save classification results to Excel
def save_classification_results_to_excel(directory, model_name, sheet_name, results, confusion):
    file_name = os.path.join(directory, f'{model_name}_classification_results.xlsx')
    try:
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    if sheet_name not in workbook.sheetnames:
        workbook.create_sheet(title=sheet_name)
    
    sheet = workbook[sheet_name]
    sheet.append(['Metric', 'Value'])
    
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])
    
    sheet.append([])
    sheet.append(['Confusion Matrix'])
    confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    header = [''] + confusion_df.columns.tolist()
    sheet.append(header)
    for idx, row in confusion_df.iterrows():
        sheet.append([idx] + row.tolist())

    workbook.save(file_name)
    print(f'Classification results saved to {file_name}, sheet: {sheet_name}')


# Main function
def run_classification_metrics():
    # Widgets
    directory_selector = widgets.Text(description='Folder Path:', layout=widgets.Layout(width='500px'))
    dir_confirm_button = widgets.Button(description='Confirm Folder', button_style='success')

    model_name_widget = widgets.Text(
        description='Model Name:',
        layout=widgets.Layout(width='500px')
    )
    dataset_type_widget = widgets.Dropdown(
        options=[('Select One', 'select_one'), ('Train', 'train'), ('Test', 'test')],
        description='Dataset Type:',
    )
    file_selector_pred = widgets.FileUpload(description='Upload y_pred')
    uploaded_file_label = widgets.Label()
    threshold_slider = widgets.FloatSlider(value=0.5, min=0.0, max=1.0, step=0.01, description='Threshold:')
    run_button = widgets.Button(description='Run Metrics', button_style='primary')
    output = widgets.Output()

    # State
    selected_directory = {'path': None}
    y_files = {'train': None, 'test': None}

    # Directory confirmation logic
    def on_dir_confirm_clicked(b):
        clear_output(wait=True)
        display(directory_selector, dir_confirm_button)
        selected_directory['path'] = directory_selector.value.strip()
        if not os.path.isdir(selected_directory['path']):
            print("❌ Invalid directory. Please enter a valid path.")
            return
        
        # Look for required files
        files = os.listdir(selected_directory['path'])
        try:
            y_files['train'] = os.path.join(selected_directory['path'],
                                            next(f for f in files if 'sampled_y_train' in f and f.endswith('.csv')))
            y_files['test'] = os.path.join(selected_directory['path'],
                                           next(f for f in files if 'y_test' in f and f.endswith('.csv')))
            display(model_name_widget, dataset_type_widget)
        except StopIteration:
            print("❌ Could not find both 'sampled_y_train' and 'y_test' CSV files in the directory.")

    # Dataset type selection logic
    def on_dataset_type_change(change):
        clear_output(wait=True)
        display(directory_selector, dir_confirm_button, model_name_widget, dataset_type_widget)
        if dataset_type_widget.value in ['train', 'test']:
            display(file_selector_pred, uploaded_file_label, threshold_slider, run_button, output)

    # File upload display logic
    def on_file_upload_change(change):
        if file_selector_pred.value:
            try:
                uploaded_filename = next(iter(file_selector_pred.value.values()))['metadata']['name']
                uploaded_file_label.value = f"✅ Uploaded: {uploaded_filename}"
            except:
                uploaded_file_label.value = "⚠️ Error reading file name"
        else:
            uploaded_file_label.value = ""

    # Run button logic
    def on_run_button_clicked(b):
        with output:
            # Basic validations
            model_name = model_name_widget.value.strip()
            dataset_type = dataset_type_widget.value
            threshold = threshold_slider.value

            if not model_name:
                print("❌ Please enter a model name.")
                return
            if dataset_type not in ['train', 'test']:
                print("❌ Please select a valid dataset type.")
                return
            if not file_selector_pred.value:
                print("❌ Please upload a prediction file.")
                return
            if not y_files[dataset_type]:
                print(f"❌ Could not find the required y_{dataset_type} file in the selected directory.")
                return

            try:
                uploaded_file = next(iter(file_selector_pred.value.values()))['content']
                y_pred = pd.read_csv(pd.io.common.BytesIO(uploaded_file)).iloc[:, 0]
                y_actual = pd.read_csv(y_files[dataset_type]).iloc[:, 0]
                y_pred = (y_pred >= threshold).astype(int)

                results, confusion = calculate_classification_metrics(y_actual, y_pred)

                display(widgets.HTML(f'<h3>Model: <b>{model_name}</b> | Dataset: <b>{dataset_type}</b> | Threshold: <b>{threshold:.2f}</b></h3>'))
                display(pd.DataFrame(results))
                display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))

                uploaded_filename = next(iter(file_selector_pred.value.keys()))
                save_classification_results_to_excel(selected_directory['path'], model_name, uploaded_filename, results, confusion)
            except Exception as e:
                print(f"⚠️ Error during processing: {e}")

    # Bind interactions
    dir_confirm_button.on_click(on_dir_confirm_clicked)
    dataset_type_widget.observe(on_dataset_type_change, names='value')
    file_selector_pred.observe(on_file_upload_change, names='value')
    run_button.on_click(on_run_button_clicked)

    # Initial display
    display(directory_selector, dir_confirm_button)
