In [1]:
""" 

Extractor.ipynb

This Jupyter notebook contains a Python class called HeaderSelector. The HeaderSelector class is used to read an Excel spreadsheet, display the headers in a user-friendly, natural language format, 
and allow the user to select multiple headers from a list. The selected headers are then used to create a new DataFrame, which is saved to a new Excel file. 

The class also includes methods to apply custom CSS styles to the widgets and the container of the widgets, providing an overall dark aesthetic.

The notebook uses ipywidgets for creating interactive widgets, pandas for handling data, and the openpyxl library for reading and writing Excel files.

This script follows the Pylint format and adheres to standard Python conventions for readability and maintainability.

Author: [Ara Alexandrian] Date: [1/24/2024] 

"""

import pandas as pd
import configparser
import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display

config = configparser.ConfigParser()
config.read(r'config.ini')
spreadsheet_path = config['SPREADSHEET']['FILE_PATH']

def convert_to_natural_language(self, headers):
    converted_headers = []
    for header in headers:
        parts = header.split('.')
        converted_header = parts[-1]
        if converted_header != 'Local_ID':
            converted_headers.append(converted_header)
    return converted_headers


def apply_styles(self):
    styles = """
    .widget-select-multiple, .widget-button {
        background-color: transparent !important;
        color: #f8f8f2 !important;
    }
    .widget-select-multiple .selected, .widget-select-multiple .option {
        background-color: transparent !important;
    }
    .widget-button .label {
        color: #f8f8f2 !important;
    }
    """
    widget_style = widgets.HTML("<style>{}</style>".format(styles))
    display(widget_style)



Author: [Ara Alexandrian] Date: [1/24/2024] 

"""

import pandas as pd
import configparser
import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display

config = configparser.ConfigParser()
config.read(r'config.ini')
spreadsheet_path = config['SPREADSHEET']['FILE_PATH']

def convert_to_natural_language(self, headers):
    converted_headers = []
    for header in headers:
        parts = header.split('.')
        converted_header = parts[-1]
        if converted_header != 'Local_ID':
            converted_headers.append(converted_header)
    return converted_headers


def apply_styles(self):
    styles = """
    .widget-select-multiple, .widget-button {
        background-color: transparent !important;
        color: #f8f8f2 !important;
    }
    .widget-select-multiple .selected, .widget-select-multiple .option {
        background-color: transparent !important;
    }
    .widget-button .label {
        color: #f8f8f2 !important;
    }
    """
    widget_style = widgets.HTML("<style>{}</style>".format(styles))
    display(widget_style)



In [4]:
from ipywidgets import Layout

class HeaderSelector:
    def __init__(self, spreadsheet_path):
        self.df = pd.read_excel(spreadsheet_path)
        self.df.columns = self.df.columns.str.strip()
        self.headers = self.convert_to_natural_language(self.df.columns.tolist())
        self.selected_headers = []

        # Adjust the height and width of the SelectMultiple widget
        self.listbox = widgets.SelectMultiple(options=self.headers, layout=Layout(height='300px', width='500px'))
        self.save_button = widgets.Button(description='Save', button_style='success')
        self.save_button.on_click(self.on_save_clicked)

        self.container = widgets.VBox([self.listbox, self.save_button], layout=Layout(padding='10px', background_color='#282a36'))

        # Apply custom CSS styles
        self.apply_styles()

    def apply_styles(self):
        styles = """
        .widget-select-multiple, .widget-button {
            background-color: #282a36 !important;
            color: #f8f8f2 !important;
        }
        .widget-select-multiple .selected, .widget-select-multiple .option {
            background-color: #44475a !important;
        }
        .widget-button .label {
            color: #f8f8f2 !important;
        }
        """
        widget_style = widgets.HTML("<style>{}</style>".format(styles))
        display(widget_style)

    def on_save_clicked(self, button):
        self.selected_headers = list(self.listbox.value)
        print("Selected headers:", self.selected_headers)

        if set(self.selected_headers).issubset(self.headers):
            new_df = self.df[self.selected_headers]
            new_df.to_excel('extracted.xlsx', index=False)
            print("Success! The selected headers have been saved to extracted.xlsx")
        else:
            print("Error: Some selected headers are not in the DataFrame")

    def convert_to_natural_language(self, headers):
        converted_headers = []
        for header in headers:
            parts = header.split('.')
            converted_header = parts[-1]
            converted_headers.append(converted_header)
        return converted_headers

header_selector = HeaderSelector('Events_runninglist.xlsx')
print("DataFrame headers:", header_selector.headers)
display(header_selector.container)


HTML(value='<style>\n        .widget-select-multiple, .widget-button {\n            background-color: #282a36 …

DataFrame headers: ['Event Number', 'Date_Time_Submitted', 'Classification', 'Location_Sub', 'Narrative', 'Tx_Technique', 'Local_ID', 'Tag/Topic', 'Reporter_Name', 'Discoverer_Role', 'Narrative_Supplemental', 'Event_Title']


VBox(children=(SelectMultiple(layout=Layout(height='300px', width='500px'), options=('Event Number', 'Date_Tim…