In [6]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox

# --- Dataset Initialization (Global) ---
battery_life_data = [
    18.5, 20.1, 22.3, 19.8, 21.6, 23.2, 17.9, 25.1,
    19.2, 20.9, 18.8, 24.5, 21.2, 22.8, 20.5, 23.7,
    19.6, 21.9, 26.4, 18.3
]
df = pd.DataFrame({'Battery_ID': [f'B{i}' for i in range(1, 21)], 'Life (Hours)': battery_life_data})
df['Life (Hours)'] = pd.to_numeric(df['Life (Hours)'])

# --- Statistical Calculations (Functions) ---

def calculate_zscores(df):
    """Computes Mean, Std Dev, and Z-Scores."""
    mean_life = df['Life (Hours)'].mean()
    # ddof=1 for Sample Standard Deviation (STDEV.S)
    std_dev_life = df['Life (Hours)'].std(ddof=1)
    
    # NOTE: The zscore function from scipy.stats returns a NumPy array.
    # When this function is called, it MODIFIES the DataFrame 'df' passed to it,
    # adding the 'Z-Score' column.
    df['Z-Score'] = zscore(df['Life (Hours)'], ddof=1)

    outliers_zscore = df[np.abs(df['Z-Score']) > 2]

    zscore_results = {
        'mean': mean_life,
        'std_dev': std_dev_life,
        # Now 'Z-Score' exists in 'df'
        'df_with_zscore': df[['Battery_ID', 'Life (Hours)', 'Z-Score']].sort_values(by='Z-Score', ascending=False).to_string(index=False),
        'outliers': outliers_zscore[['Battery_ID', 'Life (Hours)', 'Z-Score']].to_string(index=False)
    }
    return zscore_results

def calculate_iqr_outliers(df):
    """Computes Quartiles, IQR, and identifies outliers."""
    # pandas defaults to interpolation='linear' which is standard
    Q1 = df['Life (Hours)'].quantile(0.25)
    Q2 = df['Life (Hours)'].quantile(0.50)
    Q3 = df['Life (Hours)'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers_iqr = df[(df['Life (Hours)'] < lower_bound) | (df['Life (Hours)'] > upper_bound)]

    iqr_results = {
        'Q1': Q1,
        'Q2': Q2,
        'Q3': Q3,
        'IQR': IQR,
        'lower_bound': lower_bound,
        'upper_bound': upper_bound,
        # This line requires 'Z-Score' to be present in 'df'
        'outliers': outliers_iqr[['Battery_ID', 'Life (Hours)', 'Z-Score']].to_string(index=False)
    }
    return iqr_results

def calculate_percentiles_deciles(df):
    """Computes specific percentiles (P90 and D4)."""
    P90 = df['Life (Hours)'].quantile(0.90)
    D4 = df['Life (Hours)'].quantile(0.40)

    percentile_results = {
        'P90': P90,
        'D4': D4
    }
    return percentile_results

def analyze_new_battery(new_life, mean_life, std_dev_life, lower_bound, upper_bound):
    """Computes z-score for a new battery and checks for outliers."""
    try:
        new_life = float(new_life)
    except ValueError:
        return "Error: Invalid input for new battery life. Please enter a number.", False, False

    new_z_score = (new_life - mean_life) / std_dev_life
    is_outlier_z = np.abs(new_z_score) > 2
    is_outlier_iqr = (new_life < lower_bound) or (new_life > upper_bound)

    conclusion = f"New Battery Z-score: {new_z_score:.2f}\n"
    conclusion += f"- Z-score Outlier Check (|z| > 2): {'Yes' if is_outlier_z else 'No'}\n"
    conclusion += f"- IQR Outlier Check (Outside [{lower_bound:.2f}, {upper_bound:.2f}]): {'Yes' if is_outlier_iqr else 'No'}\n"

    if is_outlier_z and is_outlier_iqr:
        conclusion += "Conclusion: The new battery's life IS considered an outlier by BOTH methods."
    elif is_outlier_z:
        conclusion += "Conclusion: The new battery's life IS considered an outlier by the Z-score method."
    elif is_outlier_iqr:
        conclusion += "Conclusion: The new battery's life IS considered an outlier by the IQR method."
    else:
        conclusion += "Conclusion: The new battery's life IS NOT considered an outlier by either method."

    return conclusion, is_outlier_z, is_outlier_iqr

# --- Tkinter GUI Application ---

class BatteryAnalysisApp:
    def __init__(self, master, df):
        self.master = master
        master.title("Battery Life Statistical Analysis")
        master.geometry("800x700")

        self.df = df
        
        # --- FIX APPLIED HERE ---
        # The zscore_results calculation is called FIRST, 
        # which adds the 'Z-Score' column to self.df.
        self.zscore_results = calculate_zscores(self.df) 
        
        # Now, self.df (and its copies) contains 'Z-Score', 
        # allowing calculate_iqr_outliers to access it.
        self.iqr_results = calculate_iqr_outliers(self.df.copy())
        self.percentile_results = calculate_percentiles_deciles(self.df.copy())

        # Extract necessary values for the new battery analysis
        self.mean_life = self.zscore_results['mean']
        self.std_dev_life = self.zscore_results['std_dev']
        self.lower_bound = self.iqr_results['lower_bound']
        self.upper_bound = self.iqr_results['upper_bound']

        # Configure styles
        style = ttk.Style()
        style.configure('TFrame', background='#f0f0f0')
        style.configure('TLabel', background='#f0f0f0', font=('Arial', 10))
        style.configure('TButton', font=('Arial', 10, 'bold'))
        style.configure('Title.TLabel', font=('Arial', 12, 'bold'), foreground='navy')

        # Main frame
        main_frame = ttk.Frame(master, padding="10")
        main_frame.pack(fill='both', expand=True)

        # Output text area for initial dataset and main results
        self.results_text = scrolledtext.ScrolledText(main_frame, wrap=tk.WORD, width=90, height=25, font=('Consolas', 9))
        self.results_text.pack(pady=10, padx=10, fill='x')

        # New Battery Analysis (Problem 3.3) frame
        analysis_frame = ttk.LabelFrame(main_frame, text="Problem 3.3: New Battery Outlier Analysis", padding="10")
        analysis_frame.pack(fill='x', padx=10, pady=5)

        # Input for new battery life
        ttk.Label(analysis_frame, text="New Battery Life (Hours):").grid(row=0, column=0, padx=5, pady=5, sticky='w')
        self.new_life_entry = ttk.Entry(analysis_frame, width=15)
        self.new_life_entry.insert(0, "27.0") # Default value from the prompt
        self.new_life_entry.grid(row=0, column=1, padx=5, pady=5, sticky='w')

        # Button to run analysis
        ttk.Button(analysis_frame, text="Analyze Outlier", command=self.run_new_battery_analysis).grid(row=0, column=2, padx=10, pady=5)

        # Output label for new battery analysis
        self.analysis_output_label = ttk.Label(analysis_frame, text="Enter a value and click 'Analyze Outlier'.", justify=tk.LEFT, background='white', borderwidth=1, relief="solid", padding=5)
        self.analysis_output_label.grid(row=1, column=0, columnspan=3, padx=5, pady=5, sticky='ew')

        # Initial population of the results
        self.display_initial_results()

    def display_initial_results(self):
        """Compiles and displays the main statistical results."""
        output = []

        # Initial Dataset
        output.append("--- Initial Dataset ---")
        output.append(self.df.to_string(index=False))
        output.append("\n" + "="*50 + "\n")

        # Problem 1: Z-Scores
        output.append("--- Problem 1: Z-Scores ---")
        output.append(f"Mean Battery Life: {self.mean_life:.2f} hours")
        output.append(f"Sample Standard Deviation: {self.std_dev_life:.2f} hours")
        output.append("\nTable of Z-Scores:")
        output.append(self.zscore_results['df_with_zscore'])
        output.append("\nBatteries that are potential outliers ( |z| > 2 ):")
        if self.zscore_results['outliers'].strip():
            output.append(self.zscore_results['outliers'])
        else:
            output.append("None of the batteries have a Z-score with an absolute value greater than 2.")
        output.append("\n" + "="*50 + "\n")

        # Problem 2: Quartiles and Outliers (IQR Method)
        iqr = self.iqr_results
        output.append("--- Problem 2: Quartiles and Outliers (IQR Method) ---")
        output.append(f"1. Q1 (25th Percentile): {iqr['Q1']:.2f} hours")
        output.append(f"   Q2 (Median/50th Percentile): {iqr['Q2']:.2f} hours")
        output.append(f"   Q3 (75th Percentile): {iqr['Q3']:.2f} hours")
        output.append(f"2. Interquartile Range (IQR = Q3 - Q1): {iqr['IQR']:.2f} hours")
        output.append("\n3. Outlier Boundaries (IQR Method):")
        output.append(f"   Lower Bound (Q1 - 1.5*IQR): {iqr['lower_bound']:.2f} hours")
        output.append(f"   Upper Bound (Q3 + 1.5*IQR): {iqr['upper_bound']:.2f} hours")
        output.append("\nBatteries identified as outliers by the IQR Method:")
        if iqr['outliers'].strip():
            output.append(iqr['outliers'])
        else:
            output.append("No outliers were identified by the IQR method.")
        output.append("\n" + "="*50 + "\n")

        # Problem 3: Percentiles and Deciles (Parts 1 & 2)
        perc = self.percentile_results
        output.append("--- Problem 3: Percentiles and Deciles ---")
        output.append(f"1. The 90th percentile (P90) is: {perc['P90']:.2f} hours")
        output.append(f"   Interpretation: 90% of the batteries lasted less than {perc['P90']:.2f} hours, and 10% lasted longer.")
        output.append(f"\n2. The D4 (40th percentile) value is: {perc['D4']:.2f} hours")
        output.append("\n\n3. Analysis for a new battery life (Use the input box below):")

        # Display the results
        self.results_text.delete(1.0, tk.END)
        self.results_text.insert(tk.END, "\n".join(output))

    def run_new_battery_analysis(self):
        """Handler for the 'Analyze Outlier' button."""
        new_life_str = self.new_life_entry.get()

        try:
            new_life = float(new_life_str)
        except ValueError:
            messagebox.showerror("Input Error", "Please enter a valid number for the new battery life.")
            return

        result_text, is_outlier_z, is_outlier_iqr = analyze_new_battery(
            new_life,
            self.mean_life,
            self.std_dev_life,
            self.lower_bound,
            self.upper_bound
        )

        # Update the analysis output label
        self.analysis_output_label.config(text=result_text)

        # Optional: Highlight the status
        if is_outlier_z or is_outlier_iqr:
            self.analysis_output_label.config(foreground='red', font=('Arial', 10, 'bold'))
        else:
            self.analysis_output_label.config(foreground='green', font=('Arial', 10))


# Create the main window
root = tk.Tk()
# FIX: The DataFrame 'df' is passed directly to the class constructor.
# Inside the constructor, calculate_zscores(self.df) is called first
# without using .copy() to ensure the 'Z-Score' column is added to the 
# instance's DataFrame before calculate_iqr_outliers() tries to read it.
app = BatteryAnalysisApp(root, df)

# Start the Tkinter event loop
root.mainloop()