<a href="https://colab.research.google.com/github/Adarsh3589/Normal_distrribution-/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy.stats import norm, anderson
from google.colab import files
from IPython.display import display
import ipywidgets as widgets


# Step 1: Download the Excel file from GitHub
url = "https://raw.githubusercontent.com/Adarsh3589/Collins_15-5PH-/blob/3bad9a444c2aca96c92f87693207df0739c1fa5f/15-5%20PH%2020%20Heat%20Tracker%20Test%20Results.xlsx"

response = requests.get(url)
response.raise_for_status()
excel_data = BytesIO(response.content)

# Step 2: Load the second sheet
df = pd.read_excel(excel_data, sheet_name=1, engine='openpyxl')



# Step 4: Create a dictionary of DataFrames for each unique property
unique_properties = df["Property"].dropna().unique().tolist()
unique_properties.sort()
property_dfs = {prop: df[df["Property"] == prop] for prop in unique_properties}

# Step 5: Default property and corresponding DataFrame
default_property = unique_properties[0]
selected_property_df = property_dfs[default_property]  # ✅ initially assigned

# Step 6: Dropdown widget with default value pre-selected
property_selector = widgets.Dropdown(
    options=unique_properties,
    value=default_property,  # ✅ set default selected property
    description='Select Property:',
    disabled=False,
)

# Step 7: Output widget to show selected property data
output = widgets.Output()

# Step 8: Function to update global variable on user selection
def on_property_change(change):
    global selected_property_df
    selected_property_df = property_dfs[change['new']]
    with output:
        output.clear_output()
        print(f"Selected Property: {change['new']}")
        display(selected_property_df.head())

# Step 9: Attach the function to the dropdown
property_selector.observe(on_property_change, names='value')

# Step 10: Display UI and initial default data
display(property_selector, output)

# ✅ Trigger initial display
with output:
    output.clear_output()
    print(f"Selected Property: {default_property}")
    display(selected_property_df.head())


In [None]:
# Extract values
prop = selected_property_df['Result'].dropna().sort_values().values
prop_name =str(selected_property_df['Property'].iloc[0])
mean = np.mean(prop)
std = np.std(prop, ddof=1)
n = len(prop)

# Spec limits
Lower_Spec = selected_property_df['Spec_Min'].dropna().iloc[0] if not selected_property_df['Spec_Min'].dropna().empty else None
Upper_Spec = selected_property_df['Spec_Max'].dropna().iloc[0] if not selected_property_df['Spec_Max'].dropna().empty else None

# X-axis range: ±6σ
x_min = mean - 6 * std
x_max = mean + 6 * std
x = np.linspace(x_min, x_max, 200)
p = norm.pdf(x, mean, std)

# Anderson-Darling Test
result = anderson(prop, dist='norm')
ad_stat = result.statistic
critical_values = result.critical_values
sig_levels = result.significance_level

# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Histogram with Normal Distribution
sns.histplot(prop, kde=False, stat='density', bins=10, color='skyblue', ax=axes[0], edgecolor='black')
axes[0].plot(x, p, 'r--', label='Normal Dist')

# Sigma lines and labels
for i in range(0, 4):
    axes[0].axvline(mean + i * std, color='grey', linestyle='--', linewidth=1)
    axes[0].axvline(mean - i * std, color='grey', linestyle='--', linewidth=1)

axes[0].axvline(mean + 3 * std, color='red', linestyle='-', linewidth=2)
axes[0].axvline(mean - 3 * std, color='red', linestyle='-', linewidth=2)
axes[0].text(mean + 3 * std + std * 0.05, max(p) * 0.05, '+3σ', rotation=90, color='red', fontsize=9)
axes[0].text(mean - 3 * std + std * 0.05, max(p) * 0.05, '-3σ', rotation=90, color='red', fontsize=9)

# Conditional Spec limit annotations (only if within ±3σ beyond data range)
if Lower_Spec is not None and Lower_Spec >= (prop.min() - 6 * std):
    axes[0].axvline(Lower_Spec, color='blue', linestyle='-', linewidth=2)
    axes[0].text(Lower_Spec + std * 0.05, max(p) * 0.05, 'Lower Spec', rotation=90, color='blue', fontsize=9)

if Upper_Spec is not None and Upper_Spec <= (prop.max() + 6 * std):
    axes[0].axvline(Upper_Spec, color='blue', linestyle='-', linewidth=2)
    axes[0].text(Upper_Spec + std * 0.05, max(p) * 0.05, 'Upper Spec', rotation=90, color='blue', fontsize=9)

# Set fixed x-limits: ±6σ from mean
axes[0].set_xlim(x_min, x_max)
axes[0].set_ylim(0, max(p) * 1.2)

# Titles and labels
axes[0].set_title(f'{prop_name} Histogram with Normal Distribution')
axes[0].set_xlabel(f'{prop_name} Result Value')
axes[0].set_ylabel('Density')
axes[0].legend()
axes[0].grid(True)

# Normal Probability Plot
sorted_data = prop
percentiles = np.arange(1, n + 1) / (n + 1) * 100
theoretical_quantiles = norm.ppf(percentiles / 100, loc=mean, scale=std)
z_critical = 1.96
se = std / np.sqrt(n)
ci_upper = theoretical_quantiles + z_critical * se
ci_lower = theoretical_quantiles - z_critical * se

axes[1].plot(sorted_data, percentiles, 'o', label='Sample Data')
axes[1].plot(theoretical_quantiles, percentiles, 'r--', label='Normal Fit')
axes[1].fill_betweenx(percentiles, ci_lower, ci_upper, color='lightgray', alpha=0.5, label='95% CI')

# X-axis range for probability plot
axes[1].set_xlim(x_min, x_max)

axes[1].set_title(f'{prop_name} Normal Probability Plot')
axes[1].set_xlabel(f'{prop_name} Result Value')
axes[1].set_ylabel('Cumulative Probability (%)')
axes[1].legend()
axes[1].grid(True)

# Anderson-Darling annotation
textstr = f'Anderson-Darling Stat = {ad_stat:.4f}\n'
for i in range(len(sig_levels)):
    textstr += f'{int(sig_levels[i])}% CV = {critical_values[i]:.4f}\n'

axes[1].text(0.05, 0.95, textstr, transform=axes[1].transAxes,
             fontsize=9, verticalalignment='top',
             bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', edgecolor='black'))

plt.tight_layout()
plt.show()


In [None]:
# Install required if not already
!pip install -q matplotlib seaborn pandas

from io import BytesIO
import os
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy.stats import norm, anderson
from google.colab import files
from IPython.display import display
import ipywidgets as widgets


# Step 1: Download the Excel file from GitHub
url = "https://raw.githubusercontent.com/Adarsh3589/Normal_distrribution-/7db43f902af6b9e77baa12495238cbaa2b0d3d87/AMS%206431%2020%20Heat%20Tracker%20Test%20Results.xlsx"
response = requests.get(url)
response.raise_for_status()
excel_data = BytesIO(response.content)

# Step 2: Load the second sheet
df = pd.read_excel(excel_data, sheet_name=1, engine='openpyxl')

# Create output directory
os.makedirs("output", exist_ok=True)

# Function to plot and save image
def plot_property_distribution(selected_property_df, prop_name, save_path):
    prop = selected_property_df['Result'].dropna().sort_values().values
    if len(prop) < 2:
        print(f"Not enough data to plot for {prop_name}")
        return

    mean = np.mean(prop)
    std = np.std(prop, ddof=1)
    n = len(prop)

    Lower_Spec = selected_property_df['Spec_Min'].dropna().iloc[0] if not selected_property_df['Spec_Min'].dropna().empty else None
    Upper_Spec = selected_property_df['Spec_Max'].dropna().iloc[0] if not selected_property_df['Spec_Max'].dropna().empty else None

    x_min = mean - 6 * std
    x_max = mean + 6 * std
    x = np.linspace(x_min, x_max, 200)
    p = norm.pdf(x, mean, std)

    result = anderson(prop, dist='norm')
    ad_stat = result.statistic
    critical_values = result.critical_values
    sig_levels = result.significance_level

    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    sns.histplot(prop, kde=False, stat='density', bins=10, color='skyblue', ax=axes[0], edgecolor='black')
    axes[0].plot(x, p, 'r--', label='Normal Dist')

    for i in range(0, 4):
        axes[0].axvline(mean + i * std, color='grey', linestyle='--', linewidth=1)
        axes[0].axvline(mean - i * std, color='grey', linestyle='--', linewidth=1)

    axes[0].axvline(mean + 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].axvline(mean - 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].text(mean + 3 * std + std * 0.05, max(p) * 0.05, '+3σ', rotation=90, color='red', fontsize=9)
    axes[0].text(mean - 3 * std + std * 0.05, max(p) * 0.05, '-3σ', rotation=90, color='red', fontsize=9)

    # Plot Lower Spec if valid and within range
    if Lower_Spec is not None:
        if (mean - 6 * std) <= Lower_Spec <= (mean + 6 * std):
            axes[0].axvline(Lower_Spec, color='blue', linestyle='-', linewidth=2)
            axes[0].text(Lower_Spec + std * 0.05, max(p) * 0.05, 'Lower Spec', rotation=90, color='blue', fontsize=9)

    # Plot Upper Spec if valid and within range
    if Upper_Spec is not None:
        if (mean - 6 * std) <= Upper_Spec <= (mean + 6 * std):
            axes[0].axvline(Upper_Spec, color='blue', linestyle='-', linewidth=2)
            axes[0].text(Upper_Spec + std * 0.05, max(p) * 0.05, 'Upper Spec', rotation=90, color='blue', fontsize=9)



    axes[0].set_xlim(x_min, x_max)
    axes[0].set_ylim(0, max(p) * 1.2)
    axes[0].set_title(f'{prop_name} Histogram with Normal Distribution')
    axes[0].set_xlabel(f'{prop_name} Result Value')
    axes[0].set_ylabel('Density')
    axes[0].legend()
    axes[0].grid(True)

    sorted_data = prop
    percentiles = np.arange(1, n + 1) / (n + 1) * 100
    theoretical_quantiles = norm.ppf(percentiles / 100, loc=mean, scale=std)
    z_critical = 1.96
    se = std / np.sqrt(n)
    ci_upper = theoretical_quantiles + z_critical * se
    ci_lower = theoretical_quantiles - z_critical * se

    axes[1].plot(sorted_data, percentiles, 'o', label='Sample Data')
    axes[1].plot(theoretical_quantiles, percentiles, 'r--', label='Normal Fit')
    axes[1].fill_betweenx(percentiles, ci_lower, ci_upper, color='lightgray', alpha=0.5, label='95% CI')
    axes[1].set_xlim(x_min, x_max)
    axes[1].set_title(f'{prop_name} Normal Probability Plot')
    axes[1].set_xlabel(f'{prop_name} Result Value')
    axes[1].set_ylabel('Cumulative Probability (%)')
    axes[1].legend()
    axes[1].grid(True)

    textstr = f'Anderson-Darling Stat = {ad_stat:.4f}\n'
    for i in range(len(sig_levels)):
        textstr += f'{int(sig_levels[i])}% CV = {critical_values[i]:.4f}\n'

    axes[1].text(0.05, 0.95, textstr, transform=axes[1].transAxes,
                 fontsize=9, verticalalignment='top',
                 bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', edgecolor='black'))

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()


# Widgets
property_options = df['Property'].unique().tolist()
group_by_columns = [col for col in df.columns if col not in ['Result', 'Spec_Min', 'Spec_Max']]

selection_mode = widgets.RadioButtons(
    options=['All Graphs', 'Selected Property'],
    description='Download:',
)

property_dropdown = widgets.Dropdown(
    options=property_options,
    description='Property:',
    value=property_options[0]
)

group_by_dropdown = widgets.Dropdown(
    options=['None'] + group_by_columns,
    description='Group By:',
    value='None'
)

display(selection_mode, property_dropdown, group_by_dropdown)

# Button to trigger export
download_button = widgets.Button(description="Generate & Download Graph(s)")
display(download_button)


def on_button_clicked(b):
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    group_by_col = None if group_by_dropdown.value == 'None' else group_by_dropdown.value
    filenames = []

    if selection_mode.value == 'All Graphs':
        grouped = df.groupby(['Property'] + ([group_by_col] if group_by_col else []))
        for keys, group in grouped:
            prop = keys[0] if isinstance(keys, tuple) else keys
            suffix = f"_{keys[1]}" if isinstance(keys, tuple) and group_by_col else ""
            file_path = f"{output_dir}/{prop}{suffix}.png"
            plot_property_distribution(group, f"{prop}{suffix}", file_path)
            filenames.append(file_path)

        zip_path = "output.zip"
        with zipfile.ZipFile(zip_path, "w") as zipf:
            for file in filenames:
                zipf.write(file)
        files.download(zip_path)

    else:
        prop = property_dropdown.value
        filtered_df = df[df['Property'] == prop]
        if group_by_col:
            grouped = filtered_df.groupby(group_by_col)
            for key, group in grouped:
                file_path = f"{output_dir}/{prop}_{key}.png"
                plot_property_distribution(group, f"{prop}_{key}", file_path)
                files.download(file_path)
        else:
            file_path = f"{output_dir}/{prop}.png"
            plot_property_distribution(filtered_df, prop, file_path)
            files.download(file_path)


download_button.on_click(on_button_clicked)


In [None]:
# Install required if not already
!pip install -q matplotlib seaborn pandas

import os
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy.stats import norm, anderson
from google.colab import files
from IPython.display import display
import ipywidgets as widgets


# Step 1: Download the Excel file from GitHub
url = "https://raw.githubusercontent.com/Adarsh3589/Normal_distrribution-/7db43f902af6b9e77baa12495238cbaa2b0d3d87/AMS%206431%2020%20Heat%20Tracker%20Test%20Results.xlsx"
response = requests.get(url)
response.raise_for_status()
excel_data = BytesIO(response.content)

# Step 2: Load the second sheet
df = pd.read_excel(excel_data, sheet_name=1, engine='openpyxl')

# Create output directory
os.makedirs("output", exist_ok=True)

def plot_property_distribution(selected_property_df, prop_name, save_path):
    prop = selected_property_df['Result'].dropna().sort_values().values
    if len(prop) < 2:
        print(f"Not enough data to plot for {prop_name}")
        return

    mean = np.mean(prop)
    std = np.std(prop, ddof=1)
    n = len(prop)

    Lower_Spec = selected_property_df['Spec_Min'].dropna().iloc[0] if not selected_property_df['Spec_Min'].dropna().empty else None
    Upper_Spec = selected_property_df['Spec_Max'].dropna().iloc[0] if not selected_property_df['Spec_Max'].dropna().empty else None

    x_min = mean - 6 * std
    x_max = mean + 6 * std
    x = np.linspace(x_min, x_max, 200)
    p = norm.pdf(x, mean, std)

    result = anderson(prop, dist='norm')
    ad_stat = result.statistic
    p_value = "<0.005" if ad_stat > result.critical_values[2] else ">0.005"

    fig, axes = plt.subplots(1, 2, figsize=(15, 6), dpi=120)

    # === Histogram Plot ===
    sns.histplot(prop, kde=False, stat='density', bins=10, color='skyblue', ax=axes[0], edgecolor='black')
    axes[0].plot(x, p, 'r--', label='Normal Fit')

    # Standard deviation lines
    for i in range(1, 4):
        axes[0].axvline(mean + i * std, color='gray', linestyle='--', linewidth=1)
        axes[0].axvline(mean - i * std, color='gray', linestyle='--', linewidth=1)

    # ±3σ lines in red
    axes[0].axvline(mean + 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].axvline(mean - 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].text(mean + 3 * std + std * 0.1, max(p) * 0.05, '+3σ', rotation=90, color='red', fontsize=9)
    axes[0].text(mean - 3 * std + std * 0.1, max(p) * 0.05, '-3σ', rotation=90, color='red', fontsize=9)

    # Spec limits
    if Lower_Spec is not None:
        axes[0].axvline(Lower_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Lower_Spec + std * 0.1, max(p) * 0.05, 'Lower Spec', rotation=90, color='blue', fontsize=9)

    if Upper_Spec is not None:
        axes[0].axvline(Upper_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Upper_Spec + std * 0.1, max(p) * 0.05, 'Upper Spec', rotation=90, color='blue', fontsize=9)

    axes[0].set_xlim(x_min, x_max)
    axes[0].set_ylim(0, max(p) * 1.2)
    axes[0].set_title(f'{prop_name} Histogram with Normal Distribution')
    axes[0].set_xlabel(f'{prop_name} Result Value')
    axes[0].set_ylabel('Density')
    axes[0].legend()
    axes[0].grid(False)  # Remove grid

    # === Normal Probability Plot ===
    sorted_data = prop
    percentiles = np.arange(1, n + 1) / (n + 1) * 100
    theoretical_quantiles = norm.ppf(percentiles / 100, loc=mean, scale=std)
    z_critical = 1.96
    se = std / np.sqrt(n)
    ci_upper = theoretical_quantiles + z_critical * se
    ci_lower = theoretical_quantiles - z_critical * se

    axes[1].plot(sorted_data, percentiles, 'o', label='Sample Data', color='blue')
    axes[1].plot(theoretical_quantiles, percentiles, 'black', linestyle='-')
    axes[1].fill_betweenx(percentiles, ci_lower, ci_upper, color='lightgray', alpha=0.5)

    axes[1].set_xlim(x_min, x_max)
    axes[1].set_title(f'Probability Plot of {prop_name}')
    axes[1].set_xlabel(f'{prop_name} (Result)')
    axes[1].set_ylabel('Cumulative Probability (%)')
    axes[1].grid(True)

    # Mimicking the Minitab stats box
    textstr = (
        f"Mean    {mean:.1f}\n"
        f"StDev   {std:.3f}\n"
        f"N       {n}\n"
        f"AD      {ad_stat:.3f}\n"
        f"P-Value {p_value}"
    )

    axes[1].text(1.05, 0.85, textstr,
                 transform=axes[1].transAxes,
                 fontsize=9,
                 bbox=dict(boxstyle="round,pad=0.5", edgecolor='black', facecolor='lightblue'))

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()


# Widgets
property_options = df['Property'].unique().tolist()
group_by_columns = [col for col in df.columns if col not in ['Result', 'Spec_Min', 'Spec_Max']]

selection_mode = widgets.RadioButtons(
    options=['All Graphs', 'Selected Property'],
    description='Download:',
)

property_dropdown = widgets.Dropdown(
    options=property_options,
    description='Property:',
    value=property_options[0]
)

group_by_dropdown = widgets.Dropdown(
    options=['None'] + group_by_columns,
    description='Group By:',
    value='None'
)

display(selection_mode, property_dropdown, group_by_dropdown)

# Button to trigger export
download_button = widgets.Button(description="Generate & Download Graph(s)")
display(download_button)


def on_button_clicked(b):
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    group_by_col = None if group_by_dropdown.value == 'None' else group_by_dropdown.value
    filenames = []

    if selection_mode.value == 'All Graphs':
        grouped = df.groupby(['Property'] + ([group_by_col] if group_by_col else []))
        for keys, group in grouped:
            prop = keys[0] if isinstance(keys, tuple) else keys
            suffix = f"_{keys[1]}" if isinstance(keys, tuple) and group_by_col else ""
            file_path = f"{output_dir}/{prop}{suffix}.png"
            plot_property_distribution(group, f"{prop}{suffix}", file_path)
            filenames.append(file_path)

        zip_path = "output.zip"
        with zipfile.ZipFile(zip_path, "w") as zipf:
            for file in filenames:
                zipf.write(file)
        files.download(zip_path)

    else:
        prop = property_dropdown.value
        filtered_df = df[df['Property'] == prop]
        if group_by_col:
            grouped = filtered_df.groupby(group_by_col)
            for key, group in grouped:
                file_path = f"{output_dir}/{prop}_{key}.png"
                plot_property_distribution(group, f"{prop}_{key}", file_path)
                files.download(file_path)
        else:
            file_path = f"{output_dir}/{prop}.png"
            plot_property_distribution(filtered_df, prop, file_path)
            files.download(file_path)


download_button.on_click(on_button_clicked)

