In [4]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix
)
from openpyxl import Workbook, load_workbook
import os

# Function to calculate classification metrics
def calculate_classification_metrics(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'AUC ROC Score', 'GINI'],
        'Value': [precision, recall, f1, auc, gini]
    }

    return metrics, confusion

# Function to save classification results to Excel
def save_classification_results_to_excel(model_name, file_name, results, confusion):
    excel_file = f"{model_name}_classification_results.xlsx"
    try:
        workbook = load_workbook(excel_file)
    except FileNotFoundError:
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    if file_name not in workbook.sheetnames:
        workbook.create_sheet(title=file_name)
    
    sheet = workbook[file_name]
    
    if sheet.max_row == 1:
        sheet.append(['Metric', 'Value'])
    
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])

    if len(confusion) > 0:
        sheet.append([])
        sheet.append(['Confusion Matrix'])
        
        confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
        header = [''] + confusion_df.columns.tolist()
        sheet.append(header)
        
        for idx, row in confusion_df.iterrows():
            sheet.append([idx] + row.tolist())
    
    workbook.save(excel_file)
    print(f'Classification results saved to {excel_file}, sheet: {file_name}')

# Main function

def run_classification_metrics(sampled_y_train, y_test):
    clear_output(wait=True)
    
    # Model selection
    model_name_dropdown = widgets.Text(description="Model Name:")
    confirm_model_button = widgets.Button(description="Confirm Model")
    
    def on_model_confirmed(b):
        model_name = model_name_dropdown.value.strip()
        if model_name:
            clear_output(wait=True)
            display(dataset_type)
            dataset_type.observe(lambda change: on_dataset_selected(change, model_name), names='value')
    
    confirm_model_button.on_click(on_model_confirmed)
    display(model_name_dropdown, confirm_model_button)
    
    dataset_type = widgets.Dropdown(
        options=[('Select One', 'select_one'), ('Train', 'train'), ('Test', 'test')],
        description='Dataset Type:',
    )
    
    def on_dataset_selected(change, model_name):
        clear_output(wait=True)
        display(dataset_type)
        
        if change['new'] == 'train':
            y_actual = sampled_y_train
            file_selector = widgets.FileUpload(accept=".csv", multiple=False)
            confirm_file_button = widgets.Button(description="Confirm File")
            
            def on_file_selected(b):
                for uploaded_file in file_selector.value:
                    file_name = uploaded_file
                    content = file_selector.value[file_name]['content']
                    y_pred = pd.read_csv(pd.io.common.BytesIO(content)).iloc[:, 0]
                    compute_metrics(y_actual, y_pred, model_name, file_name)
                    return
            
            confirm_file_button.on_click(on_file_selected)
            display(file_selector, confirm_file_button)
        elif change['new'] == 'test':
            y_actual = y_test
            threshold_slider.value = 0.5
            compute_metrics(y_actual, y_test, model_name, "Test Data")

    threshold_slider = widgets.FloatSlider(
        value=0.5, min=0.0, max=1.0, step=0.01, description='Threshold:',
    )
    
    def compute_metrics(y_actual, y_pred, model_name, file_name):
        threshold = threshold_slider.value
        y_pred = (y_pred >= threshold).astype(int)
        results, confusion = calculate_classification_metrics(y_actual, y_pred)
        display(widgets.HTML(f'<h4>Model Metrics (Threshold = {threshold:.2f})</h4>'))
        display(pd.DataFrame(results))
        display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))
        save_classification_results_to_excel(model_name, file_name, results, confusion)
    
    display(threshold_slider)


In [3]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from tkinter import Tk, filedialog
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)
from openpyxl import Workbook, load_workbook

def calculate_classification_metrics(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'AUC ROC Score', 'GINI'],
        'Value': [precision, recall, f1, auc, gini]
    }
    return metrics, confusion

def save_classification_results_to_excel(model_name, results, confusion, dataset_type, sheet_name):
    file_name = f'{model_name}_classification_results.xlsx'
    try:
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    if sheet_name not in workbook.sheetnames:
        workbook.create_sheet(title=sheet_name)
    
    sheet = workbook[sheet_name]
    if sheet.max_row == 1:
        sheet.append(['Metric', 'Value'])

    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])

    if len(confusion) > 0:
        sheet.append([])
        sheet.append(['Confusion Matrix'])
        confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
        header = [''] + confusion_df.columns.tolist()
        sheet.append(header)
        for idx, row in confusion_df.iterrows():
            sheet.append([idx] + row.tolist())

    workbook.save(file_name)
    print(f'Classification results saved to {file_name}, sheet: {sheet_name}')

def select_file():
    root = Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(title="Select the file for y_pred")
    return file_path

def run_classification_metrics(sampled_y_train, y_test):
    model_name = widgets.Text(description="Model Name:")
    dataset_type = widgets.Dropdown(
        options=[('Select One', 'select_one'), ('Train', 'train'), ('Test', 'test')],
        description='Dataset:'
    )
    threshold_slider = widgets.FloatSlider(value=0.5, min=0.0, max=1.0, step=0.01, description='Threshold:')
    run_button = widgets.Button(description='Run Metrics')

    def on_run_button_clicked(b):
        clear_output(wait=True)
        display(model_name, dataset_type, threshold_slider, run_button)
        
        if dataset_type.value == 'select_one':
            print("Please select a valid dataset type.")
            return

        file_path = select_file()
        if not file_path:
            print("No file selected.")
            return
        
        y_pred = pd.read_csv(file_path).iloc[:, 0]
        y_actual = sampled_y_train if dataset_type.value == 'train' else y_test
        threshold = threshold_slider.value
        y_pred = (y_pred >= threshold).astype(int)
        
        results, confusion = calculate_classification_metrics(y_actual, y_pred)
        sheet_name = file_path.split('/')[-1].split('.')[0]
        save_classification_results_to_excel(model_name.value, results, confusion, dataset_type.value, sheet_name)
        
        display(pd.DataFrame(results))
        display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))
    
    run_button.on_click(on_run_button_clicked)
    display(model_name, dataset_type, threshold_slider, run_button)


In [4]:
run_classification_metrics(sampled_y_train, y_test)

Text(value='dtree', description='Model Name:')

Dropdown(description='Dataset:', index=2, options=(('Select One', 'select_one'), ('Train', 'train'), ('Test', â€¦

FloatSlider(value=0.5, description='Threshold:', max=1.0, step=0.01)

Button(description='Run Metrics', style=ButtonStyle())

Classification results saved to dtree_classification_results.xlsx, sheet: test_file


Unnamed: 0,Metric,Value
0,Precision,0.0
1,Recall,0.0
2,F1-Score,0.0
3,AUC ROC Score,0.1
4,GINI,-0.8


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1,4
Actual 1,3,0


In [1]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)
from openpyxl import Workbook, load_workbook
import os

def calculate_classification_metrics(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    gini = 2 * auc - 1
    confusion = confusion_matrix(y_true, y_pred)

    metrics = {
        'Metric': ['Precision', 'Recall', 'F1-Score', 'AUC ROC Score', 'GINI'],
        'Value': [precision, recall, f1, auc, gini]
    }
    return metrics, confusion

def save_classification_results_to_excel(results, confusion, model_name, dataset_type, sheet_name):
    file_name = f'{model_name}.xlsx'
    try:
        workbook = load_workbook(file_name)
    except FileNotFoundError:
        workbook = Workbook()
        if 'Sheet' in workbook.sheetnames:
            del workbook['Sheet']

    if sheet_name not in workbook.sheetnames:
        workbook.create_sheet(title=sheet_name)
    
    sheet = workbook[sheet_name]
    
    sheet.append(['Metric', 'Value'])
    for metric, value in zip(results['Metric'], results['Value']):
        sheet.append([metric, value])
    
    sheet.append([])
    sheet.append(['Confusion Matrix'])
    confusion_df = pd.DataFrame(confusion, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    for idx, row in confusion_df.iterrows():
        sheet.append([idx] + row.tolist())
    
    workbook.save(file_name)
    print(f'Results saved to {file_name} under sheet: {sheet_name}')

def create_widgets_for_metrics(prediction_folder, sampled_y_train, y_test):
    model_names = ["Decision Tree", "Random Forest", "LightGBM", "XGBoost"]  # Modify as needed
    
    model_dropdown = widgets.Dropdown(
        options=model_names,
        description='Model:',
    )
    
    def on_model_selected(change):
        clear_output(wait=True)
        display(model_dropdown)
        selected_model = model_dropdown.value
        print(f'Selected Model: {selected_model}')
        
        dataset_type = widgets.Dropdown(
            options=[('Train', 'train'), ('Test', 'test')],
            description='Dataset:',
        )
        
        file_selector = widgets.Dropdown(
            options=[f for f in os.listdir(prediction_folder) if f.endswith('.csv')],
            description='Pred File:',
        )
        
        def generate_report(b):
            selected_dataset = dataset_type.value
            selected_file = file_selector.value
            
            if selected_dataset == 'train':
                y_true = sampled_y_train
            elif selected_dataset == 'test':
                y_true = y_test
            else:
                print("Please select a valid dataset.")
                return
            
            y_pred_path = os.path.join(prediction_folder, selected_file)
            y_pred = pd.read_csv(y_pred_path).iloc[:, 0]  # Assuming first column has predictions
            
            results, confusion = calculate_classification_metrics(y_true, y_pred)
            
            display(pd.DataFrame(results))
            display(pd.DataFrame(confusion, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1']))
            
            save_classification_results_to_excel(results, confusion, selected_model, selected_dataset, selected_file.replace('.csv', ''))
        
        generate_button = widgets.Button(description='Generate Report')
        generate_button.on_click(generate_report)
        
        display(dataset_type, file_selector, generate_button)
    
    model_dropdown.observe(on_model_selected, names='value')
    display(model_dropdown)

# Example usage (update the paths and data accordingly)
prediction_folder = "C:/Users/adity/Downloads/aditya_automl_code/testing/"
sampled_y_train = pd.Series([1,0,0,1,1,1,0,0])  # Replace with actual data
y_test = pd.Series([0,0,0,1,1,1,0,0])  # Replace with actual data
create_widgets_for_metrics(prediction_folder, sampled_y_train, y_test)


Dropdown(description='Model:', options=('test_file',), value='test_file')

Dropdown(description='Dataset:', options=(('Train', 'train'), ('Test', 'test')), value='train')

Dropdown(description='Pred File:', options=('test_file.csv',), value='test_file.csv')

Button(description='Generate Report', style=ButtonStyle())

ValueError: Found input variables with inconsistent numbers of samples: [8, 5]

In [1]:
import pandas as pd

In [2]:
sampled_y_train = pd.Series([1,0,0,1,1,1,0,0])  # Replace with actual data
y_test = pd.Series([0,0,0,1,1,1,0,0])  # Replace with actual data

In [4]:
pd.DataFrame({'a':[0.99,0.89,0.67,0.33,0.21,0.99,0.88,0.5]}).to_csv('train_file.csv', index = False)

In [5]:
pd.DataFrame({'a':[0.91,0.84,0.66,0.22,0.11,0.19,0.44,0.53]}).to_csv('test_file.csv', index = False)

In [4]:
pd.read_csv('C:/Users/adity/Downloads/aditya_automl_code/testing/test_file.csv')

Unnamed: 0,a
0,1
1,2
2,3
3,4
4,5
