In [62]:
import tkinter.font as tkFont
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import os
from pathlib import Path
from typing import List,Dict,Literal,Union,Any
import json
import numpy as np
import time
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn  as sns
import tkinter as tk
import tkinter
from tkinter import messagebox
import datetime

In [63]:
# Set the default plot style and adjust Seaborn's font scale
plt.style.use('default')
sns.set_theme(font_scale=0.7)

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        # Return a formatted string with both values and percentages
        return '({v:d})'.format(v=val)
    return my_autopct

"""
    Validate a date string in the format 'YYYY_MM_DD'.
    
    Args:
        date_text (str): The date string to be validated.
        
    Raises:
        ValueError: If the date format is incorrect.
    """
def validate(date_text):
    
        try:
            datetime.datetime.strptime(date_text, '%Y_%m_%d')
        except ValueError:
            raise ValueError("Incorrect data format, should be YYYY_MM_DD")
        
# translated format's operations
def transform_df2json(df:pd.DataFrame) -> Dict:
    # translate df to json
    """
    Transforms a DataFrame into a JSON-like dictionary.

    Args:
        df (pd.DataFrame): The input DataFrame to be transformed.

    Returns:
        Dict: A dictionary where keys are column names and values are lists of column values.
    """
    json_tmp = {}
    for key in df.keys():
        json_tmp[key] = df[key].values.tolist()
    return json_tmp

def transform_json2df(json_tmp:Dict) -> pd.DataFrame:
    """
    Transforms a JSON-like dictionary back into a DataFrame.

    Args:
        json_tmp (Dict): The input JSON-like dictionary.

    Returns:
        pd.DataFrame: The reconstructed DataFrame.
    """
    df = pd.DataFrame(np.array([json_tmp[key] for key in json_tmp]).T,columns=list(json_tmp.keys())).replace('nan',np.nan)
    return df

# Reading data
def _load_dataset(folder_path:str) -> List[pd.DataFrame]:
    assert os.path.isdir(folder_path)
    path = Path(folder_path)
    csv_paths = list(path.glob('*.csv'))
    assert len(csv_paths) == 2
    result = [pd.read_csv(f,encoding='ISO-8859-1') for f in csv_paths]
    return [df.rename(columns={c:c.strip() for c in df.columns}) for df in result]

def identify_csv(dfs:List[pd.DataFrame],feature_dict:Dict[str,str] = {'Antenna':'NGR','Param':'EID'}) -> Dict[str,pd.DataFrame]:
    """identify csv

    Args:
        dfs (List[pd.DataFrame]): data csvs
        feature_dict (Dict[str,str]): Indicate the characteristics of the CSV file. example only key 'NGR' in Antenna
                                    and only key 'EID' in Param -> {'Antenna':'NGR','Param':'EID'}
    Returns:
        Dict[str,pd.DataFrame]: {'Antenna file':DataFrame, 'Param file':DataFrame}
    """
    result = {}
    result['Antenna'] = dfs[0] if feature_dict['Antenna'] in dfs[0] else dfs[1]
    result['Param'] = dfs[0] if feature_dict['Param'] in dfs[0] else dfs[1]
    return result

def load_dataset(folder_path:str,feature_dict:Dict[str,str] = {'Antenna':'NGR','Param':'EID'}) -> Dict[str,pd.DataFrame]:
    """load initial dataset from a folder path

    Args:
        folder_path (str): the path of dataset folder
        feature_dict (Dict[str,str]): Indicate the characteristics of the CSV file. example only key 'NGR' in Antenna
                                    and only key 'EID' in Param -> {'Antenna':'NGR','Param':'EID'}

    Returns:
        Dict[pd.DataFrame]: {'Antenna':DataFrame, 'Param':DataFrame}
    """
    df_list = _load_dataset(folder_path)
    df_dict = identify_csv(df_list,feature_dict)
    return df_dict

# Data preprocessing
# Dictionary of functions to process NaN values in DataFrames
processing_nan_funcs = {'any': lambda df:df.dropna(axis=0,how='any'),
                        'mean': lambda df:fill_df(df,'mean'),
                        'std': lambda df:fill_df(df,'std'),
                        'median': lambda df:fill_df(df,'median'),
                        'mode': lambda df:fill_df(df,'mode')
                        }

#Fill NaN values in a DataFrame using the specified method.
def fill_df(df:pd.DataFrame, method:Literal['mean','std','median','mode']='mean'):
    
    for c in df.columns:
        if df[c].dtype.name == 'object':
            df[c] = df[c].fillna(method='pad')
            continue
        if method == 'mean':
            df[c] = df[c].fillna(df[c].mean())
        elif method == 'std':
            df[c] = df[c].fillna(df[c].std())
        elif method == 'median':
            df[c] = df[c].fillna(df[c].median())
        elif method == 'mode':
            df[c] = df[c].fillna(df[c].mode()[0])
    return df

def remove_data(df:pd.DataFrame,key:str = 'NGR',values:List[str]=['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908'])->pd.DataFrame:
    """remove some data row from Antenna file

    Args:
        df (pd.DataFrame): Antenna Dataframe
        key (str): key of dataframe, and remove row which key in valuesk
        values (List[str]): values want to remove

    Returns:
        pd.DataFrame: clean df
    """
    df  = df.drop(df[df[key].isin(values)].index)
    return df

def extract_infomation(df_dict:Dict[str,pd.DataFrame],EIDs:List[str]=['C18A', 'C18F', 'C188']) -> pd.DataFrame:
    """

    Args:
        df_dict (Dict[str,pd.DataFrame]): {'Antenna':DataFrame, 'Param':DataFrame}
        EIDs (List[str]): EIDs which want to extract

    Returns:
        pd.DataFrame: prepare dataframe
    """
    prepare_df = df_dict['Param'].loc[df_dict['Param']['EID'].isin(EIDs)]
    extract_idxs = prepare_df['id'].values.tolist()
    prepare_left = df_dict['Antenna'].loc[df_dict['Antenna']['id'].isin(extract_idxs)]
    prepare_left = prepare_left[['id','Site Height','In-Use Ae Ht','In-Use ERP Total']]
    prepare_df = prepare_df.merge(prepare_left,on=['id'],how='left')
    prepare_df = prepare_df.rename(columns={'In-Use Ae Ht':'Aerial height(m)',
                       'In-Use ERP Total':'Power(kW)'})
    if prepare_df['Power(kW)'].dtype.name == 'object':
        prepare_df['Power(kW)'] = prepare_df['Power(kW)'].apply(lambda item:str(item).replace(',',''))
        prepare_df['Power(kW)'] = prepare_df['Power(kW)'].astype(float)
    return prepare_df

In [64]:
class Dataset:
    unload_status = 0
    load_status = 1
    
    def __init__(self):
        """
        Initialize a Dataset object.

        Attributes:
            status (int): The status of the dataset (unload_status or load_status).
            dataframe (pd.DataFrame): The main dataset DataFrame.
            antenna_dataframe (pd.DataFrame): Antenna data subset.
            param_dataframe (pd.DataFrame): Param data subset.
            Antenna_columns (None or List[str]): List of columns in the Antenna DataFrame.
            Param_columns (None or List[str]): List of columns in the Param DataFrame.
            remove_key (str): Key for removing rows from the Antenna DataFrame.
            remove_values (List[str]): Values to remove from the Antenna DataFrame.
            target_EID (List[str]): List of target EIDs.
            Site_Height_bigger_than (int): Threshold for site height.
            Date_onwards (str): Start date for filtering data.
            statistics (Dict[str, Dict[str, float]]): Dictionary to store calculated statistics.
            name (None or str): Dataset name.
        """
        self.status = Dataset.unload_status
        self.dataframe = None
        self.antenna_dataframe = None
        self.param_dataframe = None
        self.Antenna_colunms = None
        self.Param_colunms = None
        self.remove_key ='NGR'
        self.remove_values = ['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908']
        self.target_EID = ['C18A', 'C18F', 'C188']
        self.Site_Height_bigger_than = 75
        self.Date_onwards = '2001'
        self.statistics = {}
        self.name = None
    
    def statistic_in_use_erp_total(self):
        """
        Calculate statistics for In-Use ERP Total.

        This method calculates mean, mode, and median statistics for the 'In-Use ERP Total' column
        based on specified conditions and stores the results in the 'statistics' attribute.
        """
        statistic = {}
        temp_df = self.dataframe.copy()[['Date','Site Height','Power(kW)']]
        temp_df['Site Height'] = temp_df['Site Height'].astype(float)
        temp_df['Power(kW)'] = temp_df['Power(kW)'].astype(float)
        temp_df = processing_nan_funcs['mode'](temp_df)
        temp_df = temp_df[pd.to_datetime(temp_df['Date']) >= pd.to_datetime(self.Date_onwards)]
        temp_df = temp_df[temp_df['Site Height'].astype(float) > 75]
        statistic['mean'] = temp_df['Power(kW)'].mean()
        statistic['mode'] = temp_df['Power(kW)'].mode()[0]
        statistic['median'] = temp_df['Power(kW)'].median()
        self.statistics['In-Use ERP Total'] = statistic
    
    def visual_feature(self):
        """
        Generate pie charts visualizing categorical feature distributions.

        This method generates pie charts for specified categorical features in the dataset.
        The pie charts display the distribution of each category within the feature.

        Returns:
            matplotlib.figure.Figure: The generated figure containing pie charts.
        """
        if self.status != Dataset.load_status:
            raise Exception('Dataset must be loaded before attempting any visual operations!')
        plt.clf()
        keys = ['Site', 'Freq.', 'Block', 'Serv Label1', 'Serv Label2', 'Serv Label3', 'Serv Label4','Serv Label10']
        fig = plt.figure(figsize=(20,5),dpi=100)
        fig.subplots_adjust(wspace=0,hspace=0.3)
        i = 1
        for item in keys:
            ax1 = fig.add_subplot(2,4,i)
            ax1.set_title(item,fontdict={'size':7})
            temp_df = processing_nan_funcs['mode'](pd.DataFrame(self.dataframe[item].copy()))
            colors = cm.get_cmap('Wistia')(np.arange(len(temp_df[item].value_counts().index))/len(temp_df[item].value_counts().index))
            patches,l_text,p_text =ax1.pie(temp_df[item].value_counts().values,labels=temp_df[item].value_counts().index,
                                           autopct=make_autopct(temp_df[item].value_counts().values),colors=colors)
            for t in l_text:
                t.set_size(6)
            for t in p_text:
                t.set_size(7)
            i+=1
        return fig
 

    #This method generates count plots to visualize correlations between pairs of categorical features
    #in the dataset. Each count plot displays the distribution of categories within pairs of features.

          
    def visual_feature_cor(self):
        
        if self.status != Dataset.load_status:
            raise Exception('Dataset must be loaded before attempting any visual operations!')
        plt.clf()
        fig = plt.figure(figsize=(12,4))
        fig.subplots_adjust(wspace=0.6,hspace=0.3)
        plt.subplot(2,3,1)
        sns.countplot(processing_nan_funcs['mode'](self.dataframe[[ 'Freq.', 'Block']].copy()),y='Freq.',hue='Block')
        plt.subplot(2,3,2)
        sns.countplot(processing_nan_funcs['mode'](self.dataframe[[ 'Block', 'Serv Label1']].copy()),y='Block',hue='Serv Label1')
        plt.subplot(2,3,3)
        sns.countplot(processing_nan_funcs['mode'](self.dataframe[[ 'Serv Label1', 'Serv Label2']].copy()),y='Serv Label1',hue='Serv Label2')
        plt.subplot(2,3,4)
        sns.countplot(processing_nan_funcs['mode'](self.dataframe[[ 'Serv Label2', 'Serv Label3']].copy()),y='Serv Label2',hue='Serv Label3')
        plt.subplot(2,3,6)
        sns.countplot(processing_nan_funcs['mode'](self.dataframe[[ 'Serv Label3', 'Serv Label4']].copy()),y='Serv Label3',hue='Serv Label4')
        return fig
    
    @property
    def key_types(self):
        """
        Return a dictionary specifying attribute names and their corresponding data types.

        Returns:
            dict: Attribute names and their corresponding data types.
        """
        return {'status':int,'dataframe':dict,'remove_key':str,'remove_values':list,'statistics':dict,
                'target_EID':list,'Antenna_colunms':list,'Param_colunms':list,'name':str,
                'Site_Height_bigger_than':int, 'Date_onwards':str,'antnna_dataframe':dict,
                'param_dataframe':dict}
        
    
    def load(self, from_:Literal['origin','prepared'], path:str):
        """
        Load data from specified sources and prepare the dataset.

        Args:
            from_ (Literal['origin', 'prepared']): Source of the data (origin or prepared).
            path (str): Path to the dataset file.

        Raises:
            ValueError: If the prepared dataset is not in JSON format.
        """
        self._set_default() # setting default before load other dataset
        
        if from_ == 'origin':
            dfs_dict = load_dataset(path)
            self.Antenna_colunms = list(dfs_dict['Antenna'].columns)
            self.Param_colunms = list(dfs_dict['Param'].columns)
            self.antnna_dataframe = dfs_dict['Antenna']
            self.param_dataframe = dfs_dict['Param']
            self.proprecessing()
            self.name = Path(path).stem
            self.status = Dataset.load_status
            
        elif from_ == 'prepared':
            if path.split('.')[-1] != 'json':
                raise ValueError('The prepared dataset must be in JSON')
            with open(path,'r') as f:
                temp_dataset = json.load(f)
                self._load_checker(temp_dataset)
                for k,t in self.key_types.items():
                    if 'dataframe' not in k:
                        self.__setattr__(k,temp_dataset[k])
                    else:
                        self.__setattr__(k,transform_json2df(temp_dataset[k]))
            self.status = Dataset.load_status
    
    def back_up(self,name=None):
        """
        Create a backup of the dataset in JSON format.

        Args:
            name (str, optional): Name of the backup file. If None, a default name is used.

        Notes:
            The backup is created in the 'back_up' directory.
        """
        if self.status != Dataset.load_status:
            return
        if not os.path.exists('./back_up'):
            os.makedirs('./back_up')
        back_up_json = {}
        for k,t in self.key_types.items():
            if 'dataframe' not in k:
                back_up_json[k] = self.__getattribute__(k)
            else:
                back_up_json[k] =  transform_df2json(self.__getattribute__(k))
        if name is None:
            with open(f'./back_up/bk.json','w') as f:
                json.dump(back_up_json,f)
        else:
            with open(f'./{name}.json','w') as f:
                json.dump(back_up_json,f)
    
    def _load_checker(self,temp_dataset):
        """
        Check the validity of the loaded JSON dataset.

        Args:
            temp_dataset (dict): Loaded JSON dataset.

        Raises:
            ValueError: If the format of the JSON dataset is not suitable for the program.
        """
        for k,t in self.key_types.items():
            if k in temp_dataset and isinstance(temp_dataset[k],t) or temp_dataset[k] == None:
                continue
            else:
                print(k,type(temp_dataset[k]),t)
                raise ValueError('Format of JSON file is not suitable for this program!')
            
    def proprecessing(self):
        """
        Perform data preprocessing on the loaded dataframes and create the main dataset.
        """
        dis_dict = {'Antenna':remove_data(self.antnna_dataframe),'Param':self.param_dataframe}
        self.dataframe = extract_infomation(dis_dict)
    
    def setattr(self, __name: str, __value: Any) -> None:
        """
        Set the attribute values of the dataset dynamically.

        Args:
            __name (str): Name of the attribute to be set.
            __value (Any): Value to be set for the attribute.

        Raises:
            Exception: If the dataset is not loaded before attempting to set attributes.
        """
        if self.status != Dataset.load_status:
            raise Exception('Dataset must be loaded before setting attributes!')
        if __name == 'remove_key':
            if __value in self.Antenna_colunms:
                self.remove_key = __value
            else:
                raise Exception('Invalid Remove Key!')
        elif __name == 'remove_values':
            self.remove_values = [item.strip() for item in __value.split(';')]
        elif __name == 'target_EID':
            self.target_EID = [item.strip() for item in __value.split(';')]
        elif __name == 'Site_Height_bigger_than':
            self.Site_Height_bigger_than = int(__value)
        elif __name == 'Date_onwards':
            validate(__value)
            self.Date_onwards = __value
            
    def _set_default(self):
        """
        Set default values for the dataset attributes.
        """
        self.status = Dataset.unload_status
        self.dataframe = None
        self.Antenna_colunms = None
        self.Param_colunms = None
        self.remove_key ='NGR'
        self.remove_values = ['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908']
        self.target_EID = ['C18A', 'C18F', 'C188']
        self.Site_Height_bigger_than = 75
        self.Date_onwards = '2001'
        self.statistics = {}
        self.name = None



class GUI_application:
    """
        Initialize the GUI application and its components.
        """
    def __init__(self):
        self.dataset = Dataset()
        self.window = tkinter.Tk()
        self.set_ui()
        self.window.resizable(width=False, height=False)
        self.window.protocol("WM_DELETE_WINDOW",self.close) # back up
    
    
    def close(self):
        """
        Handle the close event of the window, including creating a backup if needed.
        """
        if messagebox.askokcancel("Quit", "Do you want to back up?"):
            if self.dataset.status == Dataset.load_status:
                self.dataset.back_up()
        self.window.destroy()
        import sys
        sys.exit(0)
        
    def set_ui(self):
        """
        Set up the user interface components of the GUI.
        """
        #setting title
        self.window.title("Data Processing")
        #setting window size
        width=1300
        height=500
        screenwidth = self.window.winfo_screenwidth()
        screenheight = self.window.winfo_screenheight()
        alignstr = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
        self.window.geometry(alignstr)
        self.window.resizable(width=False, height=False)

        # Dataset Name Label
        self.dataset_name_textvar = tk.StringVar(value="Dataset Name: ")
        self.dataset_name_label = tk.Label(self.window,textvariable=self.dataset_name_textvar)
        ft = tkFont.Font(family='Times',size=14)
        self.dataset_name_label["font"] = ft
        self.dataset_name_label["fg"] = "#333333"
        self.dataset_name_label["justify"] = "center"
        self.dataset_name_label.place(x=1030,y=70,width=252,height=37)

        # Load Initial Dataset Button
        self.load_init_data_button=tk.Button(self.window)
        self.load_init_data_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.load_init_data_button["font"] = ft
        self.load_init_data_button["fg"] = "#000000"
        self.load_init_data_button["justify"] = "center"
        self.load_init_data_button["text"] = "Load Initial"
        self.load_init_data_button.place(x=950,y=10,width=100,height=30)
        self.load_init_data_button["command"] = self.load_init_dataset

        # Load Prepared Dataset Button
        self.load_prepare_data_button=tk.Button(self.window)
        self.load_prepare_data_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.load_prepare_data_button["font"] = ft
        self.load_prepare_data_button["fg"] = "#000000"
        self.load_prepare_data_button["justify"] = "center"
        self.load_prepare_data_button["text"] = "Load Prepared"
        self.load_prepare_data_button.place(x=1060,y=10,width=100,height=30)
        self.load_prepare_data_button["command"] = self.load_prepare_dataset

        # Save Dataset Button
        self.save_data_button=tk.Button(self.window)
        self.save_data_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.save_data_button["font"] = ft
        self.save_data_button["fg"] = "#000000"
        self.save_data_button["justify"] = "center"
        self.save_data_button["text"] = "Save Dataset"
        self.save_data_button.place(x=1170,y=10,width=100,height=30)
        self.save_data_button["command"] = self.save_dataset

        #Remove key label
        GLabel_844=tk.Label(self.window)
        ft = tkFont.Font(family='Times',size=10)
        GLabel_844["font"] = ft
        GLabel_844["fg"] = "#333333"
        GLabel_844["justify"] = "center"
        GLabel_844["text"] = "Remove Key:"
        GLabel_844.place(x=1030,y=100,width=260,height=44)

        #Remove key entry field
        self.remove_key = tk.StringVar(value="NGR")
        self.remove_key_entry=tk.Entry(self.window,textvariable=self.remove_key)
        ft = tkFont.Font(family='Times',size=10)
        self.remove_key_entry["font"] = ft
        self.remove_key_entry["fg"] = "#333333"
        self.remove_key_entry["justify"] = "center"
        self.remove_key_entry.place(x=1030,y=130,width=250,height=30)

        #Remove values label
        GLabel_499=tk.Label(self.window)
        ft = tkFont.Font(family='Times',size=10)
        GLabel_499["font"] = ft
        GLabel_499["fg"] = "#333333"
        GLabel_499["justify"] = "center"
        GLabel_499["text"] = "Remove Values:"
        GLabel_499.place(x=1030,y=160,width=260,height=44)

        #Remove values entry field
        self.remove_values = tk.StringVar(value="NZ02553847; SE213515;NT05399374;NT252675908")
        self.remove_values_entry=tk.Entry(self.window,textvariable=self.remove_values)
        ft = tkFont.Font(family='Times',size=10)
        self.remove_values_entry["font"] = ft
        self.remove_values_entry["fg"] = "#333333"
        self.remove_values_entry["justify"] = "center"
        self.remove_values_entry.place(x=1030,y=190,width=250,height=30)

        # Canvas for displaying plots
        self.canvas=tk.Canvas(self.window)
        ft = tkFont.Font(family='Times',size=10)
        self.canvas.place(x=0,y=10,width=936,height=484)

        #Select EID label
        GLabel_442=tk.Label(self.window)
        ft = tkFont.Font(family='Times',size=10)
        GLabel_442["font"] = ft
        GLabel_442["fg"] = "#333333"
        GLabel_442["justify"] = "center"
        GLabel_442["text"] = "Select EID:"
        GLabel_442.place(x=1030,y=220,width=260,height=44)

        #Select EID entry field
        self.eid = tk.StringVar(value="C18A;C18F;C188")
        self.eid_entry=tk.Entry(self.window,textvariable=self.eid)
        ft = tkFont.Font(family='Times',size=10)
        self.eid_entry["font"] = ft
        self.eid_entry["fg"] = "#333333"
        self.eid_entry["justify"] = "center"
        self.eid_entry.place(x=1030,y=250,width=250,height=30)

        #Site Height label
        GLabel_117=tk.Label(self.window)
        ft = tkFont.Font(family='Times',size=10)
        GLabel_117["font"] = ft
        GLabel_117["fg"] = "#333333"
        GLabel_117["justify"] = "center"
        GLabel_117["text"] = "Site Height >:"
        GLabel_117.place(x=1030,y=280,width=260,height=44)
        
        #Site Height entry field
        self.bt = tk.StringVar(value="75")
        self.bt_entry=tk.Entry(self.window,textvariable=self.bt)
        ft = tkFont.Font(family='Times',size=10)
        self.bt_entry["font"] = ft
        self.bt_entry["fg"] = "#333333"
        self.bt_entry["justify"] = "center"
        self.bt_entry.place(x=1030,y=310,width=250,height=30)

        #From Date label
        GLabel_56=tk.Label(self.window)
        ft = tkFont.Font(family='Times',size=10)
        GLabel_56["font"] = ft
        GLabel_56["fg"] = "#333333"
        GLabel_56["justify"] = "center"
        GLabel_56["text"] = "From Date:"
        GLabel_56.place(x=1030,y=340,width=260,height=44)

        #From Date entry field
        self.date = tk.StringVar(value="2001_01_01")
        self.date_entry=tk.Entry(self.window,textvariable=self.date)
        ft = tkFont.Font(family='Times',size=10)
        self.date_entry["font"] = ft
        self.date_entry["fg"] = "#333333"
        self.date_entry["justify"] = "center"
        self.date_entry.place(x=1030,y=370,width=250,height=30)
        
        # Apply Filters Button
        self.apply_button=tk.Button(self.window)
        self.apply_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.apply_button["font"] = ft
        self.apply_button["fg"] = "#000000"
        self.apply_button["justify"] = "center"
        self.apply_button["text"] = "Apply\nFilters"
        self.apply_button.place(x=950,y=130,width=70,height=35)
        self.apply_button["command"] = self.apply
        
        # Show Features Button
        self.show_feature_button=tk.Button(self.window)
        self.show_feature_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.show_feature_button["font"] = ft
        self.show_feature_button["fg"] = "#000000"
        self.show_feature_button["justify"] = "center"
        self.show_feature_button["text"] = "Show\nFeatures"
        self.show_feature_button.place(x=950,y=170,width=70,height=35)
        self.show_feature_button["command"] = self.show_feature 

        # Show Feature Correlation Button
        self.show_feature_cor_button=tk.Button(self.window)
        self.show_feature_cor_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.show_feature_cor_button["font"] = ft
        self.show_feature_cor_button["fg"] = "#000000"
        self.show_feature_cor_button["justify"] = "center"
        self.show_feature_cor_button["text"] = "Show\nFeature\nCorrelation"
        self.show_feature_cor_button.place(x=950,y=210,width=70,height=55)
        self.show_feature_cor_button["command"] = self.show_feature_cor
        
         # Show Statistics Button
        self.show_statistics_button=tk.Button(self.window)
        self.show_statistics_button["bg"] = "#f0f0f0"
        ft = tkFont.Font(family='Times',size=10)
        self.show_statistics_button["font"] = ft
        self.show_statistics_button["fg"] = "#000000"
        self.show_statistics_button["justify"] = "center"
        self.show_statistics_button["text"] = "Show\nStatistics"
        self.show_statistics_button.place(x=950,y=270,width=70,height=35)
        self.show_statistics_button["command"] = self.show_statictis

 # Run the main GUI loop, handling exceptions and potential backup.   
    def run(self):
        
        try:
            self.window.mainloop()
        except:
            if self.dataset.status == Dataset.load_status:
                self.dataset.back_up()
        
    def load_init_dataset(self):
        """
        Load the initial dataset from a directory and update UI elements accordingly.
        """
        from tkinter import filedialog
        path_ = filedialog.askdirectory()
        if path_ is None or not os.path.exists(path_):
            messagebox.askokcancel("Warning","Path does not exist!")
        else:
            try:
                self.dataset.load('origin',path_)
                self.dataset_name_textvar.set(f"dataset name:{self.dataset.name}")
            except Exception as e:
                messagebox.askokcancel("Warning",str(e))
    
    def load_prepare_dataset(self):
        from tkinter import filedialog
        path_ = filedialog.askopenfilename()
        if path_ is None or not os.path.exists(path_):
            messagebox.askokcancel("Warning","Path does not exist!")
        else:
            try:
                self.dataset.load('prepared',path_)
                self.dataset_name_textvar.set(f"dataset name:{self.dataset.name}")
                self.bt.set(str(self.dataset.Site_Height_bigger_than))
                self.date.set(self.dataset.Date_onwards)
                self.remove_key.set(self.dataset.remove_key)
                self.remove_values.set(';'.join(self.dataset.remove_values))
                self.eid.set(';'.join(self.dataset.target_EID))
            except Exception as e:
                messagebox.askokcancel("Warning",str(e))      
                
    def save_dataset(self):
        """
        Save the dataset, creating a backup if possible, and handling exceptions.
        """
        try:
            self.dataset.back_up(self.dataset.name)
        except Exception as e:
            messagebox.askokcancel("Warning",str(e))
                
    def apply(self):
        """
        Apply the selected filters to the dataset and handle exceptions.
        """
        try:
            remove_key = self.remove_key.get()
            self.dataset.setattr('remove_key',remove_key)
            remove_values =self.remove_values.get()
            self.dataset.setattr('remove_values',remove_values)
            eid = self.eid.get()
            self.dataset.setattr('target_EID',eid)
            bt = self.bt.get()
            self.dataset.setattr('Site_Height_bigger_than',bt)
            date = self.date.get()
            self.dataset.setattr('Date_onwards',date)
            self.dataset.proprecessing()
        except Exception as e:
            messagebox.askokcancel("Warning",str(e))
    
    def show_feature(self):
        """
        Display the visual features of the dataset using matplotlib and update canvas.
        """
        from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
        try:
            if self.dataset.status == Dataset.unload_status:
                raise Exception("Warning","The dataset is unloaded!")
            fig = self.dataset.visual_feature()
            canvas_spice = FigureCanvasTkAgg(fig,self.window)
            canvas_spice.get_tk_widget().place(x=0,y=10,width=936,height=484)
            canvas_spice.draw()
        except Exception as e:
            messagebox.askokcancel("Warning",str(e))
    
    def show_feature_cor(self):
        """
        Display the visual feature correlations of the dataset using matplotlib and update canvas.
        """
        from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
        try:
            if self.dataset.status == Dataset.unload_status:
                raise Exception("Warning","The dataset is unloaded!")
            fig = self.dataset.visual_feature_cor()
            canvas_spice = FigureCanvasTkAgg(fig,self.window)
            canvas_spice.get_tk_widget().place(x=0,y=10,width=936,height=484)
            canvas_spice.draw()
        except Exception as e:
            messagebox.askokcancel("Warning",str(e))
    
    def show_statictis(self):
        """
        Display the statistics of the dataset and update the UI element.
        """
        try:
            if self.dataset.status == Dataset.unload_status:
                raise Exception("Warning","The dataset is unloaded!")
            self.dataset.statistic_in_use_erp_total()
            tmp = tk.StringVar(value='In-Use ERP Total:\n'+'\n'.join([f'{k} = {v}' for k,v in self.dataset.statistics['In-Use ERP Total'].items()]))
            label = tk.Label(self.window,textvariable=tmp)
            ft = tkFont.Font(family='Times',size=10)
            label["font"] = ft
            label["fg"] = "#333333"
            label["justify"] = "center"
            label["text"] = "Date_onwards"
            label.place(x=0,y=10,width=936,height=484)
        except Exception as e:
            messagebox.askokcancel("Warning",str(e))

In [65]:
# Create an instance of the GUI application
application = GUI_application()
# Run the GUI application's main loop
application.run()
