In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from tkinter import Tk, filedialog
import numpy as np
import ezodf

In [None]:
""" Load dataset as Pandas dataframe """

# Function to open a file dialog and load an Excel or CSV file into a pandas DataFrame
def load_file():
    # Initialize Tkinter window (hidden)
    root = Tk()
    root.withdraw()  # Hide the root window

    # Prompt the user to select a file
    file_path = filedialog.askopenfilename(
        title="Select a file", 
        filetypes=[("Excel files", "*.xlsx *.xls"), ("OSD files", "*.ods"), ("CSV files", "*.csv"), ("All files", "*.*")]
    )

    # Check if a file was selected
    if file_path:
        try:
            if file_path.endswith('.csv'):
                # Load CSV file
                df = pd.read_csv(file_path)
            elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
                # Load Excel file
                df = pd.read_excel(file_path)
            elif file_path.endswith('.ods'):
                spreadsheet = ezodf.opendoc(file_path)
                sheet = spreadsheet.sheets[0]
                data = []
                for row in sheet.rows():
                    row_data = [cell.value for cell in row]
                    data.append(row_data)
                header = data[0]
                data = data[1:]
                df = pd.DataFrame(data, columns=header)
            else:
                print("Unsupported file type")
                return None
            
            print(f"File loaded successfully: {file_path}")
            return df
        except Exception as e:
            print(f"Error loading file: {e}")
            return None
    else:
        print("No file selected")
        return None


In [None]:
# Load the data
df = load_file()
if df is not None:
    print(df.head())

data = df["data"].tolist()

In [None]:
# Calculate the IQR
IQR = np.percentile(data, 75) - np.percentile(data, 25)

# Calculate the bin width using the Freedman-Diaconis rule
bin_width_fd = 2 * IQR / np.power(len(data), 1/3)

# Calculate the bin width using Sturges' rule
bin_width_sturgess = (np.max(data) - np.min(data)) / (1 + 3.3 * np.log10(len(data)))

# Calculate the bin width using Scott's rule
bin_width_scott = 3.49 * np.std(data) / np.power(len(data), 1/3)

print("bin width: {:.4f} (Freedman-Diaconis); {:.4f} (Sturges); {:.4f} (Scott)".format(bin_width_fd, bin_width_sturgess, bin_width_scott))

# Create histograms using the different bin widths
plt.hist(data, bins=np.arange(min(data), max(data), bin_width_fd))
# plt.hist(data, bins=np.arange(min(data), max(data), bin_width_sturgess))
# plt.hist(data, bins=np.arange(min(data), max(data), bin_width_scott))
plt.show()