In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from tkinter import Tk, filedialog
import numpy as np
import ezodf

In [None]:
""" Open a windows that create a grid to entry fields and convert it to a pandas dataframe """

import tkinter as tk
from tkinter import messagebox
import pandas as pd

class ExcelLikeGridApp:
    def __init__(self, master):
        self.master = master
        self.master.title("Excel-Like Table")
        self.entries = []
        
        # Input for number of rows and columns
        input_frame = tk.Frame(master)
        input_frame.pack(pady=10)

        tk.Label(input_frame, text="Rows:").grid(row=0, column=0)
        self.row_entry = tk.Entry(input_frame, width=5)
        self.row_entry.grid(row=0, column=1)

        tk.Label(input_frame, text="Columns:").grid(row=0, column=2)
        self.col_entry = tk.Entry(input_frame, width=5)
        self.col_entry.grid(row=0, column=3)

        create_btn = tk.Button(input_frame, text="Create Table", command=self.create_table)
        create_btn.grid(row=0, column=4, padx=10)

        self.table_frame = tk.Frame(master)
        self.df_button = None

    def create_table(self):
        # Clear previous table
        for widget in self.table_frame.winfo_children():
            widget.destroy()
        self.entries = []

        try:
            rows = int(self.row_entry.get())
            cols = int(self.col_entry.get())
            if rows <= 0 or cols <= 0:
                raise ValueError
        except ValueError:
            messagebox.showerror("Invalid input", "Please enter positive integers for rows and columns.")
            return

        self.table_frame.pack(pady=10)

        # Create grid
        for r in range(rows):
            row_entries = []
            for c in range(cols):
                entry = tk.Entry(self.table_frame, width=15)
                entry.grid(row=r, column=c, padx=1, pady=1)
                entry.bind("<Control-v>", self.paste_from_clipboard)
                entry.bind("<Command-v>", self.paste_from_clipboard)
                row_entries.append(entry)
            self.entries.append(row_entries)

        # Add Convert to DataFrame button (only once)
        if not self.df_button:
            self.df_button = tk.Button(self.master, text="Convert to DataFrame", command=self.convert_to_dataframe)
            self.df_button.pack(pady=10)

    def paste_from_clipboard(self, event):
        try:
            clipboard = self.master.clipboard_get()
        except tk.TclError:
            return "break"

        start_widget = event.widget

        for r, row in enumerate(self.entries):
            if start_widget in row:
                start_row = r
                start_col = row.index(start_widget)
                break
        else:
            return "break"

        lines = clipboard.strip().split('\n')
        for i, line in enumerate(lines):
            cells = line.split('\t')
            for j, cell in enumerate(cells):
                r = start_row + i
                c = start_col + j
                if r < len(self.entries) and c < len(self.entries[0]):
                    entry = self.entries[r][c]
                    entry.delete(0, tk.END)
                    entry.insert(0, cell)

        return "break"

    def convert_to_dataframe(self):
        data = []
        for row_entries in self.entries:
            row_data = [entry.get() for entry in row_entries]
            data.append(row_data)

        df = pd.DataFrame(data)
        print("\nGenerated DataFrame:\n", df)

        # Show preview in a new Text widget
        top = tk.Toplevel(self.master)
        top.title("DataFrame Preview")
        text = tk.Text(top, wrap="none", width=100, height=20)
        text.insert("1.0", df.to_string(index=False))
        text.pack(padx=10, pady=10)

# Run the app
if __name__ == "__main__":
    root = tk.Tk()
    app = ExcelLikeGridApp(root)
    root.mainloop()


In [None]:
""" Load dataset as Pandas dataframe """

# Function to open a file dialog and load an Excel or CSV file into a pandas DataFrame
def load_file():
    # Initialize Tkinter window (hidden)
    root = Tk()
    root.withdraw()  # Hide the root window

    # Prompt the user to select a file
    file_path = filedialog.askopenfilename(
        title="Select a file", 
        filetypes=[("Excel files", "*.xlsx *.xls"), ("OSD files", "*.ods"), ("CSV files", "*.csv"), ("All files", "*.*")]
    )

    # Check if a file was selected
    if file_path:
        try:
            if file_path.endswith('.csv'):
                # Load CSV file
                df = pd.read_csv(file_path)
            elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
                # Load Excel file
                df = pd.read_excel(file_path)
            elif file_path.endswith('.ods'):
                spreadsheet = ezodf.opendoc(file_path)
                sheet = spreadsheet.sheets[0]
                data = []
                for row in sheet.rows():
                    row_data = [cell.value for cell in row]
                    data.append(row_data)
                header = data[0]
                data = data[1:]
                df = pd.DataFrame(data, columns=header)
            else:
                print("Unsupported file type")
                return None
            
            print(f"File loaded successfully: {file_path}")
            return df
        except Exception as e:
            print(f"Error loading file: {e}")
            return None
    else:
        print("No file selected")
        return None


In [None]:
# Load the data
df = load_file()
if df is not None:
    print(df.head())

In [None]:
""" Calculate bin size and create histogram """

# Calculate the IQR
IQR = np.percentile(data, 75) - np.percentile(data, 25)

# Calculate the bin width using the Freedman-Diaconis rule
bin_width_fd = 2 * IQR / np.power(len(data), 1/3)

# Calculate the bin width using Sturges' rule
bin_width_sturgess = (np.max(data) - np.min(data)) / (1 + 3.3 * np.log10(len(data)))

# Calculate the bin width using Scott's rule
bin_width_scott = 3.49 * np.std(data) / np.power(len(data), 1/3)

print("bin width: {:.4f} (Freedman-Diaconis); {:.4f} (Sturges); {:.4f} (Scott)".format(bin_width_fd, bin_width_sturgess, bin_width_scott))

# Create histograms using the different bin widths
plt.hist(data, bins=np.arange(min(data), max(data), bin_width_fd))
# plt.hist(data, bins=np.arange(min(data), max(data), bin_width_sturgess))
# plt.hist(data, bins=np.arange(min(data), max(data), bin_width_scott))
plt.show()