<a href="https://colab.research.google.com/github/Adarsh3589/AMS5659-15-5PH-BOEING/blob/main/AMS5659BOEING%20DATA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<div class="markdown-google-sans">

<a name="machine-learning-examples"></a>

### Featured examples

</div>

- [Retraining an Image Classifier](https://tensorflow.org/hub/tutorials/tf2_image_retraining): Build a Keras model on top of a pre-trained image classifier to distinguish flowers.
- [Text Classification](https://tensorflow.org/hub/tutorials/tf2_text_classification): Classify IMDB movie reviews as either *positive* or *negative*.
- [Style Transfer](https://tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization): Use deep learning to transfer style between images.
- [Multilingual Universal Sentence Encoder Q&A](https://tensorflow.org/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa): Use a machine learning model to answer questions from the SQuAD dataset.
- [Video Interpolation](https://tensorflow.org/hub/tutorials/tweening_conv3d): Predict what happened in a video between the first and the last frame.


In [None]:
# Install required if not already
!pip install -q matplotlib seaborn pandas

import os
import zipfile
import requests
from io import BytesIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import norm, anderson

from google.colab import files
from IPython.display import display
import ipywidgets as widgets



# Step 1: Download the Excel file from GitHub
url = "https://raw.githubusercontent.com/Adarsh3589/300m-boieng-/cfb4fc1b2fea7b50d77886be7d32d2b58b0d209a/Boeing%20data%20300m.xlsx"

response = requests.get(url)
response.raise_for_status()
excel_data = BytesIO(response.content)

# Step 2: Load the second sheet
df = pd.read_excel(excel_data, sheet_name=0, engine='openpyxl')
print(df.columns)

# Create output directory
os.makedirs("output", exist_ok=True)

# Function to plot and save image
def plot_property_distribution(selected_property_df, prop_name, save_path):
    prop = selected_property_df['Result'].dropna().sort_values().values
    if len(prop) < 2:
        print(f"Not enough data to plot for {prop_name}")
        return

    mean = np.mean(prop)
    std = np.std(prop, ddof=1)
    n = len(prop)

    Lower_Spec = selected_property_df['Spec_Min'].dropna().iloc[0] if not selected_property_df['Spec_Min'].dropna().empty else None
    Upper_Spec = selected_property_df['Spec_Max'].dropna().iloc[0] if not selected_property_df['Spec_Max'].dropna().empty else None

    x_min = mean - 6 * std
    x_max = mean + 6 * std
    x = np.linspace(x_min, x_max, 200)
    p = norm.pdf(x, mean, std)

    # Anderson-Darling Test
    result = anderson(prop, dist='norm')
    ad_stat = result.statistic
    critical_value_5 = result.critical_values[2]  # Corresponds to 5% significance level
    p_value = "<0.005" if ad_stat > critical_value_5 else ">0.005"
    normality = "Distribution is NOT Normal at 95% CI" if ad_stat > critical_value_5 else "Distribution is Normal at 95% CI"

    fig, axes = plt.subplots(1, 2, figsize=(15, 6), dpi=120)

    # === Histogram Plot ===
    sns.histplot(prop, kde=False, stat='count', bins=10, color='skyblue', ax=axes[0], edgecolor='black')
    axes[0].plot(x, p * len(prop) * (x[1] - x[0]), 'r--', label='Normal Fit')  # Rescale PDF to match frequency

    # Get histogram counts and bin edges
    counts, bin_edges = np.histogram(prop, bins=10)
    y_max = max(counts)  # Top of histogram bars

    # Get histogram counts and bin edges
    counts, bin_edges = np.histogram(prop, bins=10)
    y_max = max(counts)  # Top of histogram bars

    # Annotate each bin edge at the top and draw vertical grid lines
    for edge in bin_edges:
        axes[0].annotate(f'{edge:.2f}', xy=(edge, y_max), xytext=(0, 5), textcoords='offset points',
                        ha='center', va='bottom', fontsize=8, rotation=90, color='black')
        axes[0].axvline(x=edge, color='gray', linestyle='--', linewidth=0.5)


    # Standard deviation lines
    for i in range(0, 4):
        axes[0].axvline(mean + i * std, color='gray', linestyle='--', linewidth=1)
        axes[0].axvline(mean - i * std, color='gray', linestyle='--', linewidth=1)

    axes[0].axvline(mean + 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].axvline(mean - 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].text(mean + 3 * std + std * 0.1, max(p) * len(prop) * (x[1] - x[0]) * 0.05, '+3σ', rotation=90, color='red', fontsize=9)
    axes[0].text(mean - 3 * std + std * 0.1, max(p) * len(prop) * (x[1] - x[0]) * 0.05, '-3σ', rotation=90, color='red', fontsize=9)

    # Spec limits
    if Lower_Spec is not None and x_min <= Lower_Spec <= x_max:
        axes[0].axvline(Lower_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Lower_Spec + std * 0.05, max(p) * len(prop) * (x[1] - x[0]) * 0.05, 'Lower Spec', rotation=90, color='blue', fontsize=9)

    if Upper_Spec is not None and x_min <= Upper_Spec <= x_max:
        axes[0].axvline(Upper_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Upper_Spec + std * 0.05, max(p) * len(prop) * (x[1] - x[0]) * 0.05, 'Upper Spec', rotation=90, color='blue', fontsize=9)

    axes[0].set_xlim(x_min, x_max)
    axes[0].set_title(f'{prop_name} Histogram with Normal Distribution')
    axes[0].set_xlabel(f'{prop_name} Result Value')
    axes[0].set_ylim(0, y_max+0.2*y_max)
    axes[0].set_ylabel('Frequency')
    axes[0].legend()
    axes[0].grid(False)

    # === Normal Probability Plot ===
    sorted_data = prop
    percentiles = np.arange(1, n + 1) / (n + 1) * 100
    theoretical_quantiles = norm.ppf(percentiles / 100, loc=mean, scale=std)
    z_critical = 1.96
    se = std / np.sqrt(n)
    ci_upper = theoretical_quantiles + z_critical * se
    ci_lower = theoretical_quantiles - z_critical * se

    axes[1].plot(sorted_data, percentiles, 'o', label='Sample Data', color='blue', markersize=2)
    axes[1].plot(theoretical_quantiles, percentiles, 'black', linestyle='-')
    axes[1].fill_betweenx(percentiles, ci_lower, ci_upper, color='lightgray', alpha=0.5)

    axes[1].set_xlim(x_min, x_max)
    axes[1].set_title(f'Probability Plot of {prop_name}')
    axes[1].set_xlabel(f'{prop_name} (Result)')
    axes[1].set_ylabel('Cumulative Probability (%)')
    axes[1].grid(True)

    # === Annotation Box at Top Left of Second Graph ===
    textstr = (
        f"Mean      : {mean:.2f}\n"
        f"Std Dev   : {std:.3f}\n"
        f"N         : {n}\n"
        f"AD Stat   : {ad_stat:.3f}\n"
        f"Crit@5%   : {critical_value_5:.3f}\n"
        f"P-Value   : {p_value}\n"
        f"{normality}"
    )

    axes[1].text(0.02, 0.98, textstr,
                 transform=axes[1].transAxes,
                 fontsize=9,
                 va='top', ha='left',
                 bbox=dict(boxstyle="round,pad=0.5", edgecolor='black', facecolor='lightblue'))

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# Widgets
property_options = df['Property'].unique().tolist()
group_by_columns = [col for col in df.columns if col not in ['Result', 'Spec_Min', 'Spec_Max']]

selection_mode = widgets.RadioButtons(
    options=['All Graphs', 'Selected Property'],
    description='Download:',
)

property_dropdown = widgets.Dropdown(
    options=property_options,
    description='Property:',
    value=property_options[0]
)

group_by_dropdown = widgets.Dropdown(
    options=['None'] + group_by_columns,
    description='Group By:',
    value='None'
)

display(selection_mode, property_dropdown, group_by_dropdown)

# Button to trigger export
download_button = widgets.Button(description="Generate & Download Graph(s)")
display(download_button)


def on_button_clicked(b):
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    group_by_col = None if group_by_dropdown.value == 'None' else group_by_dropdown.value
    filenames = []

    if selection_mode.value == 'All Graphs':
        grouped = df.groupby(['Property'] + ([group_by_col] if group_by_col else []))
        for keys, group in grouped:
            prop = keys[0] if isinstance(keys, tuple) else keys
            suffix = f"_{keys[1]}" if isinstance(keys, tuple) and group_by_col else ""
            file_path = f"{output_dir}/{prop}{suffix}.png"
            plot_property_distribution(group, f"{prop}{suffix}", file_path)
            filenames.append(file_path)

        zip_path = "output.zip"
        with zipfile.ZipFile(zip_path, "w") as zipf:
            for file in filenames:
                zipf.write(file)
        files.download(zip_path)

    else:
        prop = property_dropdown.value
        filtered_df = df[df['Property'] == prop]
        if group_by_col:
            grouped = filtered_df.groupby(group_by_col)
            for key, group in grouped:
                file_path = f"{output_dir}/{prop}_{key}.png"
                plot_property_distribution(group, f"{prop}_{key}", file_path)
                files.download(file_path)
        else:
            file_path = f"{output_dir}/{prop}.png"
            plot_property_distribution(filtered_df, prop, file_path)
            files.download(file_path)


download_button.on_click(on_button_clicked)

In [None]:
# Install required if not already
!pip install -q matplotlib seaborn pandas

import os
import zipfile
import requests
from io import BytesIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import norm, anderson

from google.colab import files
from IPython.display import display
import ipywidgets as widgets



# Step 1: Download the Excel file from GitHub
url = "https://raw.githubusercontent.com/Adarsh3589/AMS5659-15-5PH-BOEING/f1a58f61276b71b81a17a8b68025c80be0ddfac1/AMS5659(15-5PH)%20BOEING.xlsx"

response = requests.get(url)
response.raise_for_status()
excel_data = BytesIO(response.content)

# Step 2: Load the second sheet
df = pd.read_excel(excel_data, sheet_name=0, engine='openpyxl')
print(df.columns)

# Create output directory
os.makedirs("output", exist_ok=True)

# Function to plot and save image
def plot_property_distribution(selected_property_df, prop_name, save_path):
    prop = selected_property_df['Result'].dropna().sort_values().values
    if len(prop) < 2:
        print(f"Not enough data to plot for {prop_name}")
        return

    mean = np.mean(prop)
    std = np.std(prop, ddof=1)
    n = len(prop)

    Lower_Spec = selected_property_df['Spec_Min'].dropna().iloc[0] if not selected_property_df['Spec_Min'].dropna().empty else None
    Upper_Spec = selected_property_df['Spec_Max'].dropna().iloc[0] if not selected_property_df['Spec_Max'].dropna().empty else None

    x_min = mean - 6 * std
    x_max = mean + 6 * std
    x = np.linspace(x_min, x_max, 200)
    p = norm.pdf(x, mean, std)

    # Anderson-Darling Test
    result = anderson(prop, dist='norm')
    ad_stat = result.statistic
    critical_value_5 = result.critical_values[2]  # Corresponds to 5% significance level
    p_value = "<0.005" if ad_stat > critical_value_5 else ">0.005"
    normality = "Distribution is NOT Normal at 95% CI" if ad_stat > critical_value_5 else "Distribution is Normal at 95% CI"

    fig, axes = plt.subplots(1, 2, figsize=(15, 6), dpi=120)

    # === Histogram Plot ===

    # Plot histogram on the primary Y-axis (left)
    sns.histplot(prop, kde=False, stat='count', bins=10, color='skyblue',
                ax=axes[0], edgecolor='black', label='Frequency')

    # Get histogram counts and bin edges
    counts, bin_edges = np.histogram(prop, bins=10)
    y_max = max(counts)

    # Annotate each bin edge at the top and draw vertical grid lines
    for edge in bin_edges:
        axes[0].annotate(f'{edge:.3f}', xy=(edge, y_max), xytext=(0, 5), textcoords='offset points',
                        ha='center', va='bottom', fontsize=8, rotation=90, color='black')
        axes[0].axvline(x=edge, color='gray', linestyle='--', linewidth=0.5)

    # Plot the Normal Fit line on the secondary Y-axis (right)
    ax2 = axes[0].twinx()
    rescaled_p = p * len(prop) * (x[1] - x[0])  # Rescale PDF to match histogram frequency scale
    normal_line, = ax2.plot(x, rescaled_p, 'r--', label='Normal Fit')  # Explicit handle

    # Set Y-axis labels and appearance
    axes[0].set_ylabel('Frequency')
    ax2.set_ylabel('Normal Fit (scaled)', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    ax2.set_ylim(0, max(rescaled_p) * 1.2)

    # Show separate legends to avoid overlap
    axes[0].legend(loc='upper left')             # Histogram legend on left
    ax2.legend(loc='upper right')                # Normal Fit legend on right


    # Standard deviation lines
    for i in range(0, 4):
        axes[0].axvline(mean + i * std, color='gray', linestyle='--', linewidth=1)
        axes[0].axvline(mean - i * std, color='gray', linestyle='--', linewidth=1)

    axes[0].axvline(mean + 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].axvline(mean - 3 * std, color='red', linestyle='-', linewidth=2)
    axes[0].text(mean + 3 * std + std * 0.1, y_max * 0.05, '+3σ', rotation=90, color='red', fontsize=9)
    axes[0].text(mean - 3 * std + std * 0.1, y_max * 0.05, '-3σ', rotation=90, color='red', fontsize=9)

    # Spec limits
    if Lower_Spec is not None and x_min <= Lower_Spec <= x_max:
        axes[0].axvline(Lower_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Lower_Spec + std * 0.05, y_max * 0.05, 'Lower Spec', rotation=90, color='blue', fontsize=9)

    if Upper_Spec is not None and x_min <= Upper_Spec <= x_max:
        axes[0].axvline(Upper_Spec, color='blue', linestyle='-', linewidth=2)
        axes[0].text(Upper_Spec + std * 0.05, y_max * 0.05, 'Upper Spec', rotation=90, color='blue', fontsize=9)

    # Axis settings
    axes[0].set_xlim(x_min, x_max)
    axes[0].set_ylim(0, y_max + 0.2 * y_max)
    axes[0].set_title(f'{prop_name} Result_Saarloha Histogram with Normal Distribution')
    axes[0].set_xlabel(f'{prop_name} Result Value')
    axes[0].set_ylabel('Frequency')
    axes[0].legend(loc='upper left')
    axes[0].grid(False)

    # === Normal Probability Plot ===
    sorted_data = prop
    percentiles = np.arange(1, n + 1) / (n + 1) * 100
    theoretical_quantiles = norm.ppf(percentiles / 100, loc=mean, scale=std)
    z_critical = 1.96
    se = std / np.sqrt(n)
    ci_upper = theoretical_quantiles + z_critical * se
    ci_lower = theoretical_quantiles - z_critical * se

    # Plot sample data
    axes[1].plot(sorted_data, percentiles, 'o', label='Sample Data', color='blue', markersize=2)

    # Plot centerline (theoretical normal line)
    centerline, = axes[1].plot(theoretical_quantiles, percentiles, color='black', linestyle='-', label='Normal Line')

    # Fill confidence interval and assign handle
    ci_fill = axes[1].fill_betweenx(percentiles, ci_lower, ci_upper, color='lightgray', alpha=0.5, label='95% CI')

    # Axis settings
    axes[1].set_xlim(x_min, x_max)
    axes[1].set_title(f'Probability Plot of {prop_name} Result_Saarloha')
    axes[1].set_xlabel(f'{prop_name} Result')
    axes[1].set_ylabel('Cumulative Probability (%)')
    axes[1].grid(True)

    # Add legend in the upper right corner
    axes[1].legend(loc='upper right')

    # === Annotation Box at Top Left of Second Graph ===
    textstr = (
        f"Mean      : {mean:.2f}\n"
        f"Std Dev   : {std:.3f}\n"
        f"N         : {n}\n"
        f"AD Stat   : {ad_stat:.3f}\n"
        f"Crit@5%   : {critical_value_5:.3f}\n"
        f"P-Value   : {p_value}\n"
        f"{normality}"
    )

    axes[1].text(0.02, 0.98, textstr,
                 transform=axes[1].transAxes,
                 fontsize=9,
                 va='top', ha='left',
                 bbox=dict(boxstyle="round,pad=0.5", edgecolor='black', facecolor='lightblue'))

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# Widgets
property_options = df['Property'].unique().tolist()
group_by_columns = [col for col in df.columns if col not in ['Result', 'Spec_Min', 'Spec_Max']]

selection_mode = widgets.RadioButtons(
    options=['All Graphs', 'Selected Property'],
    description='Download:',
)

property_dropdown = widgets.Dropdown(
    options=property_options,
    description='Property:',
    value=property_options[0]
)

group_by_dropdown = widgets.Dropdown(
    options=['None'] + group_by_columns,
    description='Group By:',
    value='None'
)

display(selection_mode, property_dropdown, group_by_dropdown)

# Button to trigger export
download_button = widgets.Button(description="Generate & Download Graph(s)")
display(download_button)


def on_button_clicked(b):
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    group_by_col = None if group_by_dropdown.value == 'None' else group_by_dropdown.value
    filenames = []

    if selection_mode.value == 'All Graphs':
        grouped = df.groupby(['Property'] + ([group_by_col] if group_by_col else []))
        for keys, group in grouped:
            prop = keys[0] if isinstance(keys, tuple) else keys
            suffix = f"_{keys[1]}" if isinstance(keys, tuple) and group_by_col else ""
            file_path = f"{output_dir}/{prop}{suffix}.png"
            plot_property_distribution(group, f"{prop}{suffix}", file_path)
            filenames.append(file_path)

        zip_path = "output.zip"
        with zipfile.ZipFile(zip_path, "w") as zipf:
            for file in filenames:
                zipf.write(file)
        files.download(zip_path)

    else:
        prop = property_dropdown.value
        filtered_df = df[df['Property'] == prop]
        if group_by_col:
            grouped = filtered_df.groupby(group_by_col)
            for key, group in grouped:
                file_path = f"{output_dir}/{prop}_{key}.png"
                plot_property_distribution(group, f"{prop}_{key}", file_path)
                files.download(file_path)
        else:
            file_path = f"{output_dir}/{prop}.png"
            plot_property_distribution(filtered_df, prop, file_path)
            files.download(file_path)


download_button.on_click(on_button_clicked)