In [1]:
import pandas as pd

In [2]:
sampled_y_train = pd.Series([1,0,0,1,1,1,0,0])  # Replace with actual data
y_test = pd.Series([0,0,0,1,1,1,0,0])  # Replace with actual data

In [3]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix
)
from openpyxl import Workbook, load_workbook
import os

# Function to calculate classification metrics
def calculate_classification_metrics(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'AUC ROC Score', 'GINI'],
        'Value': [precision, recall, f1, auc, gini]
    }
    return metrics, confusion

# Function to save classification results to Excel
def save_classification_results_to_excel(model_name, sheet_name, results, confusion):
    file_name = f'{model_name}_classification_results.xlsx'
    try:
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    if sheet_name not in workbook.sheetnames:
        workbook.create_sheet(title=sheet_name)
    
    sheet = workbook[sheet_name]
    sheet.append(['Metric', 'Value'])
    
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])
    
    sheet.append([])
    sheet.append(['Confusion Matrix'])
    confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    header = [''] + confusion_df.columns.tolist()
    sheet.append(header)
    for idx, row in confusion_df.iterrows():
        sheet.append([idx] + row.tolist())

    workbook.save(file_name)
    print(f'Classification results saved to {file_name}, sheet: {sheet_name}')

# Main function to run classification metrics
def run_classification_metrics(sampled_y_train, y_test):
    model_name_widget = widgets.Text(description='Model Name:')
    dataset_type_widget = widgets.Dropdown(
        options=[('Select One', 'select_one'), ('Train', 'train'), ('Test', 'test')],
        description='Dataset Type:',
    )
    file_selector = widgets.FileUpload(description='Select y_pred File')
    threshold_slider = widgets.FloatSlider(value=0.5, min=0.0, max=1.0, step=0.01, description='Threshold:')
    run_button = widgets.Button(description='Run Metrics')
    output = widgets.Output()
    
    def on_dataset_type_change(change):
        clear_output(wait=True)
        display(model_name_widget, dataset_type_widget)
        if dataset_type_widget.value in ['train', 'test']:
            display(file_selector, threshold_slider, run_button, output)
    
    def on_run_button_clicked(b):
        with output:
            clear_output(wait=True)
            model_name = model_name_widget.value.strip()
            dataset_type = dataset_type_widget.value
            threshold = threshold_slider.value
            
            if not model_name or dataset_type == 'select_one' or not file_selector.value:
                print("Please enter a model name, select a dataset, and upload a file.")
                return
            
            uploaded_file = next(iter(file_selector.value.values()))['content']
            y_pred = pd.read_csv(pd.io.common.BytesIO(uploaded_file)).iloc[:, 0]
            y_actual = sampled_y_train if dataset_type == 'train' else y_test
            y_pred = (y_pred >= threshold).astype(int)
            
            results, confusion = calculate_classification_metrics(y_actual, y_pred)
            display(pd.DataFrame(results))
            display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))
            
            save_classification_results_to_excel(model_name, os.path.basename(list(file_selector.value.keys())[0]), results, confusion)
    
    dataset_type_widget.observe(on_dataset_type_change, names='value')
    run_button.on_click(on_run_button_clicked)
    
    display(model_name_widget, dataset_type_widget)


In [4]:
run_classification_metrics(sampled_y_train, y_test)

Text(value='dtree', description='Model Name:')

Dropdown(description='Dataset Type:', index=2, options=(('Select One', 'select_one'), ('Train', 'train'), ('Te…

FileUpload(value={'train_file.csv': {'metadata': {'name': 'train_file.csv', 'type': 'text/csv', 'size': 50, 'l…

FloatSlider(value=0.5, description='Threshold:', max=1.0, step=0.01)

Button(description='Run Metrics', style=ButtonStyle())

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '          Metric     Value\n0      Pre…