# deleted code

In [None]:
def get_undersampled_df(melt_df, sampling_step):
    df_resampled = melt_df.sort_values('wavelength').groupby('measurement_index', group_keys=False).apply(lambda group: group.iloc[::sampling_step]).reset_index(drop=True)
    return df_resampled

In [None]:
def get_accuracy_of_undersampled_model(sampling_step): 
    #get the undersampled dataframe
    undersampled_melt_df = get_undersampled_df(melt_df, sampling_step)
    #get the vecvector_df
    vector_df = mf.get_vector_df(undersampled_melt_df, 'intensity')
    #train the model and get the result
    class_train_df, class_test_df = train_model_and_get_results(vector_df)
    #calculate accuracy and f1 score
    accuracy, f1 = get_accuracy_and_f1(class_test_df.loc[:, 'true_gmm_class'], class_test_df.loc[:, 'gmm_predicted_class'])
    num_datapoints = X.shape[1]
    return accuracy, num_datapoints

In [None]:
def train_model_and_get_results(vector_df):
    X, y = mf.get_X_y_from_vector_df(vector_df)
    # Splitting the data into training and testing sets with a 70-30 split ratio
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
    
    
    #make the pipeline
    lda_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('lda', LinearDiscriminantAnalysis())
    ])
    
    #GMM pipeline
    gmm_pipeline = Pipeline([
        ('gmm', GaussianMixture(n_components=4))  # Adjust n_components for GMM
    ])
    
    #fit_transform the lda_df
    train_lda = pd.DataFrame(lda_pipeline.fit_transform(X_train, y_train))
    #fit_transform the lda_df
    test_lda = pd.DataFrame(lda_pipeline.transform(X_test))
    
    #make the train_lda_df
    train_lda_df = pd.DataFrame(lda_pipeline.fit_transform(X_train, y_train))
    #predict the train labels
    train_predictions = gmm_pipeline.fit_predict(train_lda_df)
    #make the test_lda_df
    test_lda_df = pd.DataFrame(lda_pipeline.transform(X_test))
    #make the test predictions
    test_predictions = gmm_pipeline.predict(test_lda_df)
    # Create a DataFrame to associate original labels with cluster assignments
    train_df = pd.DataFrame({'true_class': y_train, 'gmm_predicted_class': train_predictions})
    #create the test df
    test_df = pd.DataFrame({'true_class': y_test, 'gmm_predicted_class': test_predictions})

    #get the map based on majority voting
    map = train_df.groupby('true_class').apply(mf.majority_vote).reset_index().rename({
        'true_class': 'gmm_class',
        0: 'gmm_class_numerical'}, axis = 1)
    
    #now apply this majority map
    class_train_df = mf.convert_GMM_to_class_labels(train_df, map)
    class_test_df = mf.convert_GMM_to_class_labels(test_df, map)

    return class_train_df, class_test_df

In [None]:
lineplot = sns.lineplot(concat_df[concat_df.loc[:, 'train_or_test'] == 'noise-free model'], x= 'standard_deviation', y = 'score', hue= 'train_or_test')
lineplot.legend_.set_title('')
plt.legend(loc='upper right', bbox_to_anchor=(1.9, 1), fontsize = variables.fontsize)

sns.scatterplot(concat_df[concat_df.loc[:, 'train_or_test'] == 'noise-free model'], x= 'standard_deviation', y = 'score', hue= 'train_or_test', legend = False,s = variables.scatterplot_size)
# Hide the right and upper spines (axes)
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
plt.ylim(0.2, 1)
plt.xlabel('Standard deviation', fontsize = variables.fontsize)
plt.ylabel('Accuracy', fontsize = variables.fontsize)
plt.savefig(fp.figure_filepath + '/' + 'Standard_deviation_vs_accuracy_noise_free_model.png', bbox_inches = 'tight')
plt.show()

In [None]:
reload_libraries()
import tkinter as tk


def filename_on_key_release(event, filename_var, filename_suggestion_listbox, filename_suggestion_items):
    search_term = filename_var.get().lower()
    # Clear previous suggestions
    filename_suggestion_listbox.delete(0, tk.END)
    if not search_term:  # If the entry field is empty
        # Populate listbox with all filenames
        for item in filename_suggestion_items:
            filename_suggestion_listbox.insert(tk.END, item)
    else:
        # Filter suggestions based on the entered text
        suggestions = [item for item in filename_suggestion_items if search_term in item.lower()]
        # Display filtered suggestions
        for suggestion in suggestions:
            filename_suggestion_listbox.insert(tk.END, suggestion)

def on_filename_suggestion_selected(event,filename_var, filename_suggestion_listbox):
    # Get the selected suggestion and insert it into the entry
    selected_index = filename_suggestion_listbox.curselection()
    if selected_index:
        selected_suggestion = filename_suggestion_listbox.get(selected_index)
        filename_var.set(selected_suggestion)
        filename_suggestion_listbox.delete(0, tk.END)  # Clear suggestions after selecting one

def plot_spectrum(fig, canvas, filename_var): 
    fig.clear()
    df = pd.read_csv(testing_data_path + '/' + filename_var.get())

    ax1 = fig.add_subplot(111)
    ax1.set_xlabel(fln.spectrum_x)
    ax1.set_ylabel(fln.spectrum_y)
    ax1.set_title('Spectrum of ' + "\"" + filename_var.get()[:-4] + "\"")
    sns.lineplot(data=df, x='wavelength', y='intensity', color='black', ax=ax1)
    # ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=8)
    # ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=8)

    fig.set_size_inches(4, 4)  # Increase the width of the figure
    plt.tight_layout()
    canvas.draw()



def plot_prediction(prediction_fig, prediction_canvas, filename_var):
    probability_df = mf.predict_class_probability(df)
    probability_df = mf.replace_bacterial_names_for_plotting(probability_df)
    ax1 = prediction_fig.add_subplot(111)
    sns.barplot(data=probability_df, y='gmm_class', x='probability', ax=ax1)
    ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right', fontsize=8)
    ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=8)
    ax1.set_title("Prediction")

    prediction_fig.set_size_inches(4, 3)  # Increase the width of the figure
    plt.tight_layout()
    prediction_canvas.draw()
    # Save the prediction figure with tight layout
    filename = "prediction.png"
    prediction_fig.savefig(filename, bbox_inches='tight')
    return filename
    
def create_gui(data_directory):
    # Create main window
    root = tk.Tk()
    root.title("Raman prediction")

    width = 25
    padx = 5
    pady = 5
    
    #so here is the filename frame
    filename_frame = tk.LabelFrame(root, relief = "groove", text = 'Select Filename: ', bg = 'white')
    filename_frame.config(font=6)
    filename_var = tk.StringVar(root)
    filename_var.set('')
    filename_entry= tk.Entry(filename_frame, textvariable = filename_var, width=width)
    filename_entry.bind("<KeyRelease>", lambda event: filename_on_key_release(event, filename_var, filename_suggestion_listbox, get_most_recent_filenames_list(data_directory)))

    filename_suggestion_listbox = tk.Listbox(filename_frame, width = width)
    filename_suggestion_listbox.bind("<ButtonRelease-1>", lambda event: on_filename_suggestion_selected(event, filename_var, filename_suggestion_listbox))
    # Get most recent filenames
    filenames = get_most_recent_filenames_list(data_directory)
    # Populate suggestion listbox with all filenames
    for filename in filenames:
        filename_suggestion_listbox.insert(tk.END, filename)

    #here is the plotting of the canvas
    figure_frame = tk.LabelFrame(root,relief = 'groove', bg = 'white', text = 'Plot and prediction window ')
    fig = Figure(figsize=(4, 4), dpi=100) 
    canvas = FigureCanvasTkAgg(fig, master=figure_frame)
    canvas.draw() 
    canvas.get_tk_widget().grid(row=0, column=0, columnspan=2, rowspan=1)
    plot_button = tk.Button(filename_frame, text='Plot and predict!', command= lambda: plot_spectrum(fig, canvas, filename_var))

    #the prediction frame
    prediction_frame = tk.LabelFrame(root,relief = 'groove', bg = 'white', text = 'prediction wiondow ')
    prediction_fig = Figure(figsize=(4, 4), dpi=100) 
    prediction_canvas = FigureCanvasTkAgg(prediction_fig, master=prediction_frame)
    prediction_canvas.draw() 
    prediction_canvas.get_tk_widget().grid(row=0, column=0, columnspan=13, rowspan=15)
    predict_button = tk.Button(figure_frame, text='predict!', command= lambda: plot_prediction(prediction_fig, prediction_canvas, filename_var))

    
    #publish everything here
    #the filename frame
    filename_frame.grid(row = 0, column = 0, rowspan=2, padx = 10, pady= 10)    
    filename_entry.grid(row = 1, column=0, sticky = 'e', padx= padx)
    plot_button.grid(row = 1, column = 1, sticky = 'e', pady=pady)
    filename_suggestion_listbox.grid(row=2, column=0)
    #the figure filename
    figure_frame.grid(row = 0, column = 1, rowspan=10, padx = 10, pady= 10)   
    #the prediction filename
    prediction_frame.grid(row = 0, column = 2, rowspan=10, padx = 20, pady= 10) 
    predict_button.grid(row = 1, column = 0, sticky = 'e', pady=pady)
    # Run the main event loop
    root.mainloop()
# Call the function to create the GUI
create_gui(testing_data_path)