In [12]:
import pandas as pd
import numpy as np
import tkinter as tk
from tkinter import ttk, messagebox
import os
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns

sns.set_style("darkgrid")
plt.rcParams['figure.dpi'] = 100

# --- Configuration ---
RAW_DATA_FILE = 'Weather Data - Sheet1 (1).csv'

# --- Core Data Analysis Functions ---


def aggregate_data(df_raw):
    try:
        df_raw['Date Only'] = df_raw['DATE'].ffill()
        df_raw['Date Only'] = pd.to_datetime(df_raw['Date Only'], format='%m/%d/%Y')

        daily_summary = df_raw.groupby('Date Only').agg(
            **{
                'Daily Average Temperature (°C)': ('TEMPERATURE (°C)', 'mean'),
                'Daily Total Rainfall (mm)': ('RAINFALL (mm)', 'sum'),
                'Dominant Condition': ('ACTUAL CONDITION', lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan),
                'OCCURENCE OF FLOOD': ('OCCURENCE OF FLOOD', 'first')
            }
        ).reset_index()

        return daily_summary

    except Exception as e:
        raise Exception(f"Error during data aggregation: {e}")


def compute_statistics(df, columns):
    stats = {}
    for col in columns:
        data = df[col].dropna()
        mean = data.mean()
        std_dev = data.std()
        cv = (std_dev / mean) * 100 if mean != 0 else np.nan

        col_name = col.split('(')[0].strip()
        stats[col_name] = {
            'Mean': mean,
            'Median': data.median(),
            'Standard Deviation': std_dev,
            'Coefficient of Variation (CV)': cv
        }
    return stats


class WeatherAnalysisApp:
    def __init__(self, master):
        self.master = master
        master.title("Annual Weather Data Analysis & Visualization")

        current_dir = os.getcwd()
        data_path = os.path.join(current_dir, RAW_DATA_FILE)

        try:
            self.df_raw = pd.read_csv(data_path)

            required_cols = ['DATE', 'TEMPERATURE (°C)', 'RAINFALL (mm)', 'ACTUAL CONDITION', 'OCCURENCE OF FLOOD']
            missing_cols = [col for col in required_cols if col not in self.df_raw.columns]
            if missing_cols:
                raise Exception(f"Missing required columns in CSV: {missing_cols}")

            self.df_daily = aggregate_data(self.df_raw.copy())
            if self.df_daily is None or self.df_daily.empty:
                raise Exception("Aggregation returned no data.")

            self.stats_columns = ['Daily Average Temperature (°C)', 'Daily Total Rainfall (mm)']
            self.stats_results = compute_statistics(self.df_daily, self.stats_columns)
            self.report_strings = self.generate_report_strings(self.stats_results)

        except FileNotFoundError:
            messagebox.showerror(
                "File Error",
                f"Error: Data file '{RAW_DATA_FILE}' not found.\n\nExpected Path: {data_path}\n\nPlease ensure the CSV file is in your current working directory."
            )
            master.withdraw()
            return
        except Exception as e:
            messagebox.showerror("Application Error", f"An unexpected error occurred during processing:\n{e}")
            master.withdraw()
            return

        self.setup_gui()

    def generate_report_strings(self, stats):
        temp_stats = stats['Daily Average Temperature']
        rain_stats = stats['Daily Total Rainfall']

        cv_temp = temp_stats['Coefficient of Variation (CV)']
        cv_rain = rain_stats['Coefficient of Variation (CV)']
        cv_comparison = cv_rain / cv_temp if cv_temp != 0 else np.inf

        mean_temp = temp_stats['Mean']
        exec_summary = (
            f"The average annual temperature was {mean_temp:.1f}°C, "
            f"and rainfall exhibited significant variability compared to temperature."
        )

        if cv_rain > cv_temp:
            key_analysis = (
                f"RESULT: Daily Total Rainfall ({cv_rain:.1f}%) exhibits *greater relative variability* "
                f"than Daily Average Temperature ({cv_temp:.1f}%)."
            )
        else:
            key_analysis = (
                f"RESULT: Daily Average Temperature ({cv_temp:.1f}%) exhibits *greater relative variability* "
                f"than Daily Total Rainfall ({cv_rain:.1f}%)."
            )

        cv_discussion = (
            f"Rainfall, with a CV of {cv_rain:.1f}%, is {cv_comparison:.1f} times "
            f"more unpredictable (variable) than the Daily Average Temperature (CV of {cv_temp:.1f}%)."
        )

        return {
            'exec_summary': exec_summary,
            'key_analysis': key_analysis,
            'cv_discussion': cv_discussion,
            'temp_mean': f"{temp_stats['Mean']:.1f}",
            'temp_stdev': f"{temp_stats['Standard Deviation']:.1f}",
            'rain_mean': f"{rain_stats['Mean']:.1f}",
            'rain_stdev': f"{rain_stats['Standard Deviation']:.1f}",
        }

    def setup_gui(self):
        notebook = ttk.Notebook(self.master)
        self.master.columnconfigure(0, weight=1)
        self.master.rowconfigure(0, weight=1)
        notebook.grid(row=0, column=0, pady=10, padx=10, sticky="nsew")

        report_frame = ttk.Frame(notebook, padding="15")
        notebook.add(report_frame, text=' Statistical Report ')
        self._setup_report_frame(report_frame)

        plot_frame = ttk.Frame(notebook, padding="15")
        notebook.add(plot_frame, text=' Data Visualizations ')
        self._setup_plot_frame(plot_frame)

    def _setup_report_frame(self, frame):
        frame.grid_columnconfigure(0, weight=1)

        # Use LabelFrame for grouping
        report_section = ttk.LabelFrame(frame, text="Phase 4: Final Report Findings", padding=15)
        report_section.grid(row=0, column=0, sticky="nsew", padx=5, pady=5)
        report_section.grid_columnconfigure(0, weight=1)

        # Executive Summary
        ttk.Label(report_section, text="1. Executive Summary:", font=("Helvetica", 13, "bold")).grid(
            row=0, column=0, sticky=tk.W, pady=(0, 5))
        ttk.Label(report_section, text=self.report_strings['exec_summary'], wraplength=700, justify=tk.LEFT).grid(
            row=1, column=0, sticky=tk.W, pady=(0, 10))

        # Central Tendency and Spread
        ttk.Label(report_section, text="2. Central Tendency and Spread:", font=("Helvetica", 13, "bold")).grid(
            row=2, column=0, sticky=tk.W, pady=(10, 5))

        # Frame for Treeview with scrollbar
        tree_frame = ttk.Frame(report_section)
        tree_frame.grid(row=3, column=0, sticky="ew", pady=5)
        tree_frame.columnconfigure(0, weight=1)

        stat_cols = ("Statistic", "Temperature Mean", "Temperature Std Dev", "Rainfall Mean", "Rainfall Std Dev")
        stat_tree = ttk.Treeview(tree_frame, columns=stat_cols, show='headings', height=3)

        for col in stat_cols:
            stat_tree.heading(col, text=col, anchor=tk.CENTER)
            if col == "Statistic":
                stat_tree.column(col, anchor=tk.W, width=180)
            else:
                stat_tree.column(col, anchor=tk.CENTER, width=120)

        stat_tree.insert("", tk.END, values=(
            "Mean",
            self.report_strings['temp_mean'],
            "—",
            self.report_strings['rain_mean'],
            "—"
        ))
        stat_tree.insert("", tk.END, values=(
            "Std Dev (\u03C3)",
            "—",
            self.report_strings['temp_stdev'],
            "—",
            self.report_strings['rain_stdev']
        ))

        stat_tree.grid(row=0, column=0, sticky="ew")

        # Scrollbar for Treeview
        scrollbar = ttk.Scrollbar(tree_frame, orient="vertical", command=stat_tree.yview)
        stat_tree.configure(yscrollcommand=scrollbar.set)
        scrollbar.grid(row=0, column=1, sticky='ns')

        # CV Discussion
        ttk.Label(report_section, text="Coefficient of Variation (Relative Spread):", font=("Helvetica", 11, "bold")).grid(
            row=4, column=0, sticky=tk.W, pady=(10, 0))
        ttk.Label(report_section, text=self.report_strings['cv_discussion'], wraplength=700, justify=tk.LEFT).grid(
            row=5, column=0, sticky=tk.W, pady=(0, 10))

        # Key Analytical Task
        ttk.Label(report_section, text="Key Analytical Task (CV Comparison):", font=("Helvetica", 13, "bold")).grid(
            row=6, column=0, sticky=tk.W, pady=(10, 5))
        ttk.Label(report_section, text=self.report_strings['key_analysis'], wraplength=700, justify=tk.LEFT,
                  foreground='blue', font=("Helvetica", 11, "italic")).grid(row=7, column=0, sticky=tk.W)

    def embed_plot(self, parent_frame, plot_type, data_col, title, x_label, plot_row, plot_col):
        fig = Figure(figsize=(4.5, 3.5), dpi=100)
        ax = fig.add_subplot(111)

        if plot_type == 'bar':
            condition_counts = self.df_raw[data_col].value_counts().nlargest(5)
            sns.barplot(x=condition_counts.index, y=condition_counts.values, ax=ax, palette="coolwarm")
            ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha='right', fontsize=8)
            ax.set_ylabel('Frequency (Occurrences)', fontsize=9)
            ax.grid(axis='y', linestyle='--', alpha=0.7)

            max_count = condition_counts.max()
            y_ticks = np.arange(0, max_count + 20, 20)
            ax.set_yticks(y_ticks)

        else:
            data = self.df_daily[data_col].dropna()
            if plot_type == 'hist':
                sns.histplot(data, bins=20, color='darkorange', kde=True, ax=ax, alpha=0.7)
                ax.set_ylabel('Frequency', fontsize=9)
                ax.grid(axis='y', linestyle=':', alpha=0.6)
            elif plot_type == 'box':
                sns.boxplot(x=data, ax=ax, color='mediumseagreen', fliersize=4, linewidth=1.5)
                ax.set_ylabel('')
                ax.grid(axis='x', linestyle=':', alpha=0.6)

        ax.set_title(title, fontsize=11, fontweight='semibold', family='Calibri')
        ax.set_xlabel(x_label, fontsize=9)
        ax.tick_params(axis='both', which='major', labelsize=8)
        fig.tight_layout()

        canvas = FigureCanvasTkAgg(fig, master=parent_frame)
        canvas_widget = canvas.get_tk_widget()
        canvas_widget.grid(row=plot_row, column=plot_col, padx=6, pady=6, sticky="nsew")
        canvas.draw()

    def embed_timeseries_plot(self, parent_frame, plot_row, plot_col):
        fig = Figure(figsize=(9.5, 3.5), dpi=100)
        ax = fig.add_subplot(111)

        ax.plot(self.df_daily['Date Only'], self.df_daily['Daily Total Rainfall (mm)'],
                color='navy', linewidth=1.5, marker='o', markersize=3, markerfacecolor='orange', markeredgewidth=0.8)

        ax.set_title('4. Time Series Plot of Daily Total Rainfall Over the Year', fontsize=11, fontweight='semibold', family='Calibri')
        ax.set_xlabel('Date', fontsize=9)
        ax.set_ylabel('Daily Total Rainfall (mm)', fontsize=9)
        ax.grid(True, linestyle='--', alpha=0.5)
        ax.tick_params(axis='both', which='major', labelsize=8)
        fig.tight_layout()

        canvas = FigureCanvasTkAgg(fig, master=parent_frame)
        canvas_widget = canvas.get_tk_widget()
        canvas_widget.grid(row=plot_row, column=plot_col, padx=6, pady=6, columnspan=2, sticky="nsew")
        canvas.draw()

    def embed_comparative_boxplot(self, parent_frame, plot_row, plot_col):
        fig = Figure(figsize=(4.5, 3.5), dpi=100)
        ax = fig.add_subplot(111)

        sns.boxplot(x='OCCURENCE OF FLOOD', y='Daily Average Temperature (°C)',
                    data=self.df_daily, ax=ax, palette="Set2", fliersize=5, linewidth=1.5)

        ax.set_title('5. Daily Average Temperature by Flood Occurrence', fontsize=11, fontweight='semibold', family='Calibri')
        ax.set_xlabel('Occurrence of Flood', fontsize=9)
        ax.set_ylabel('Daily Average Temperature (°C)', fontsize=9)
        ax.grid(axis='y', linestyle=':', alpha=0.6)
        ax.tick_params(axis='both', which='major', labelsize=8)
        fig.tight_layout()

        canvas = FigureCanvasTkAgg(fig, master=parent_frame)
        canvas_widget = canvas.get_tk_widget()
        canvas_widget.grid(row=plot_row, column=plot_col, padx=6, pady=6, sticky="nsew")
        canvas.draw()

    def _setup_plot_frame(self, frame):
        frame.grid_columnconfigure(0, weight=1)
        frame.grid_columnconfigure(1, weight=1)
        frame.grid_rowconfigure(1, weight=1)
        frame.grid_rowconfigure(2, weight=1)
        frame.grid_rowconfigure(3, weight=1)

        # Use LabelFrame for grouping
        plot_section = ttk.LabelFrame(frame, text="Phase 3: Data Visualizations", padding=15)
        plot_section.grid(row=0, column=0, columnspan=2, sticky="nsew", padx=5, pady=5)
        plot_section.grid_columnconfigure(0, weight=1)
        plot_section.grid_columnconfigure(1, weight=1)

        self.embed_plot(plot_section, 'hist', 'Daily Average Temperature (°C)',
                        '1. Distribution of Daily Average Temperature',
                        'Daily Average Temperature (°C)',
                        1, 0)

        self.embed_plot(plot_section, 'box', 'Daily Average Temperature (°C)',
                        '2. Box Plot of Daily Average Temperature',
                        'Daily Average Temperature (°C)',
                        1, 1)

        self.embed_plot(plot_section, 'bar', 'ACTUAL CONDITION',
                        '3. Frequency of Top 5 Actual Conditions',
                        'Actual Condition',
                        2, 0)

        self.embed_comparative_boxplot(plot_section, 2, 1)

        self.embed_timeseries_plot(plot_section, 3, 0)


if __name__ == '__main__':
    root = tk.Tk()
    root.withdraw()

    app = WeatherAnalysisApp(root)

    if hasattr(app, 'df_daily'):
        root.geometry("1000x850")
        root.deiconify()

    root.mainloop()



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=condition_counts.index, y=condition_counts.values, ax=ax, palette="coolwarm")
  ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha='right', fontsize=8)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='OCCURENCE OF FLOOD', y='Daily Average Temperature (°C)',
