In [49]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix
)
from openpyxl import Workbook, load_workbook

# Function to calculate regression metrics
def calculate_regression_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    n = len(y_true)
    adj_r2 = 1 - (1 - r2) * (n - 1) / (n - 1)
    return {
        'Metric': ['MAE', 'MSE', 'RMSE', 'R2', 'Adjusted R2'],
        'Value': [mae, mse, rmse, r2, adj_r2]
    }

# Function to calculate classification metrics
def calculate_classification_metrics(y_true, y_pred, amount):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    tdr = np.sum(y_pred * y_true) / np.sum(y_true)
    vdr = np.sum(y_pred * amount) / np.sum(amount)
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'GINI', 'TDR', 'VDR'],
        'Value': [precision, recall, f1, gini, tdr, vdr]
    }

    return metrics, confusion

# Function to display messages
def display_message(message):
    print(message)

# Helper function to check if a column is suitable for regression (continuous data)
def is_suitable_for_regression(column_data):
    unique_values = column_data.nunique()
    return unique_values > 2  # Simple check to ensure it's continuous data (more than 2 unique values)

# Helper function to check if a column is suitable for classification (binary or categorical data)
def is_suitable_for_classification(column_data):
    unique_values = column_data.nunique()
    return unique_values == 2  # For binary classification, only 2 unique values are expected


# Function to save regression results to Excel
def save_regression_results_to_excel(results, dataset_type):
    file_name = 'regression_results.xlsx'
    try:
        # Try to load the existing workbook
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        # If not found, create a new workbook and remove the default sheet
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    # Check if the sheet for the dataset type already exists, and create if not
    if dataset_type not in workbook.sheetnames:
        workbook.create_sheet(title=dataset_type)
    
    # Select the appropriate sheet
    sheet = workbook[dataset_type]

    # Append the header only if it's a new sheet
    if sheet.max_row == 1:
        sheet.append(['Metric', 'Value'])

    # Append the new results
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])

    # Save the workbook
    workbook.save(file_name)
    print(f'Regression results saved to {file_name}, sheet: {dataset_type}')

# Function to save classification results to Excel
def save_classification_results_to_excel(results, confusion, dataset_type):
    file_name = 'classification_results.xlsx'
    try:
        # Try to load the existing workbook
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        # If not found, create a new workbook and remove the default sheet
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    # Check if the sheet for the dataset type already exists, and create if not
    if dataset_type not in workbook.sheetnames:
        workbook.create_sheet(title=dataset_type)
    
    # Select the appropriate sheet
    sheet = workbook[dataset_type]

    # Append the header only if it's a new sheet
    if sheet.max_row == 1:
        sheet.append(['Metric', 'Value'])

    # Append the metrics
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])

    # Add a section for the confusion matrix
    sheet.append([])
    sheet.append(['Confusion Matrix'])

    # Convert the confusion matrix DataFrame to a format that includes column and index labels
    confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    
    # Write the header row for the confusion matrix
    header = [''] + confusion_df.columns.tolist()
    sheet.append(header)

    # Write each row of the confusion matrix with the index label
    for idx, row in confusion_df.iterrows():
        sheet.append([idx] + row.tolist())

    # Save the workbook
    workbook.save(file_name)
    print(f'Classification results saved to {file_name}, sheet: {dataset_type}')


# Main function for widget-based interface
def create_widgets_for_metrics(dataset):
    global current_dataset

    dataset_type = widgets.Dropdown(
        options=[('Select One', 'select_one'), ('Train', 'train'), ('Test', 'test')],
        description='Dataset Type:',
    )

    def on_dataset_type_change(change):
        global current_dataset
        clear_output(wait=True)
        display(dataset_type)
        if change['new'] == 'train':
            current_dataset = dataset.copy()
            display(train_buttons)
        elif change['new'] == 'test':
            current_dataset = dataset.copy()
            display(test_buttons)

    dataset_type.observe(on_dataset_type_change, names='value')

    regression_button = widgets.Button(description='Regression Metrics', tooltip="Generate metrics for regression tasks.")
    classification_button = widgets.Button(description='Classification Metrics', tooltip="Generate metrics for classification tasks.")

    def on_regression_button_clicked(b):
        regression_button.style.button_color = 'lightblue'  # Highlight the selected button
        classification_button.style.button_color = None  # Reset the other button color
        clear_output(wait=True)
        display(dataset_type)
        display(regression_button, classification_button)  # Re-display the buttons
        display(regression_selection)

    def on_classification_button_clicked(b):
        classification_button.style.button_color = 'lightblue'  # Highlight the selected button
        regression_button.style.button_color = None  # Reset the other button color
        clear_output(wait=True)
        display(dataset_type)
        display(regression_button, classification_button)  # Re-display the buttons
        display(classification_selection)

    regression_button.on_click(on_regression_button_clicked)
    classification_button.on_click(on_classification_button_clicked)

    regression_target = widgets.Dropdown(
        options=[col for col in dataset.columns if dataset[col].dtype in ['int64', 'float64']],
        description='Actual:',
    )
    regression_prediction = widgets.Dropdown(
        options=[col for col in dataset.columns if dataset[col].dtype in ['int64', 'float64']],
        description='Predicted:',
    )

    def generate_regression_report(b):
        if not (is_suitable_for_regression(current_dataset[regression_target.value]) and 
                is_suitable_for_regression(current_dataset[regression_prediction.value])):
            display_message("These metrics are not applicable to regression problems.")
            return
        y_true = current_dataset[regression_target.value]
        y_pred = current_dataset[regression_prediction.value]
        results = calculate_regression_metrics(y_true, y_pred)
        display(pd.DataFrame(results))
        save_regression_results_to_excel(results, 'train' if dataset_type.value == 'train' else 'test')

    regression_generate_button = widgets.Button(description='Generate Regression Report', tooltip="Generate the regression report.")
    regression_generate_button.on_click(generate_regression_report)

    regression_selection = widgets.VBox([regression_target, regression_prediction, regression_generate_button])

    classification_actual = widgets.Dropdown(
        options=[col for col in dataset.columns if dataset[col].dtype in ['int64', 'float64']],
        description='Actual:',
    )
    classification_prediction = widgets.Dropdown(
        options=[col for col in dataset.columns if dataset[col].dtype in ['int64', 'float64']],
        description='Predicted:',
    )
    classification_amount = widgets.Dropdown(
        options=[col for col in dataset.columns if dataset[col].dtype in ['int64', 'float64']],
        description='Amount:',
    )

    def generate_classification_report(b):
        if not (is_suitable_for_classification(current_dataset[classification_actual.value]) and 
                is_suitable_for_classification(current_dataset[classification_prediction.value])):
            display_message("These metrics are not applicable to classification problems.")
            return
        y_true = current_dataset[classification_actual.value]
        y_pred = current_dataset[classification_prediction.value]
        amount = current_dataset[classification_amount.value]
        results, confusion = calculate_classification_metrics(y_true, y_pred, amount)
        display(pd.DataFrame(results))
        display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))
        save_classification_results_to_excel(results, confusion, 'train' if dataset_type.value == 'train' else 'test')

    classification_generate_button = widgets.Button(description='Generate Classification Report', tooltip="Generate the classification report.")
    classification_generate_button.on_click(generate_classification_report)

    classification_selection = widgets.VBox([classification_actual, classification_prediction, classification_amount, classification_generate_button])

    train_buttons = widgets.VBox([regression_button, classification_button])
    test_buttons = widgets.VBox([regression_button, classification_button])

    display(dataset_type)


In [50]:
# Example Usage
df = pd.DataFrame({
    'r_actual': [3, 5, 2, 7, 8],
    'r_predicted': [2.5, 5.1, 2.0, 7.2, 7.8],
    'c_actual': [1, 0, 0, 1, 1],
    'c_predicted': [0, 0, 1, 1, 1],
    'trans_amt': [200, 300, 400, 100, 700]
})

In [51]:
create_widgets_for_metrics(df)

Dropdown(description='Dataset Type:', index=2, options=(('Select One', 'select_one'), ('Train', 'train'), ('Te…

Button(description='Regression Metrics', style=ButtonStyle(), tooltip='Generate metrics for regression tasks.'…

Button(description='Classification Metrics', style=ButtonStyle(button_color='lightblue'), tooltip='Generate me…

VBox(children=(Dropdown(description='Actual:', index=2, options=('r_actual', 'r_predicted', 'c_actual', 'c_pre…

Unnamed: 0,Metric,Value
0,Precision,0.666667
1,Recall,0.666667
2,F1-Score,0.666667
3,GINI,0.166667
4,TDR,0.666667
5,VDR,0.705882


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1,1
Actual 1,1,2
