## ***CE-672 Assignment - 04***
### **Amipriya Anand (220122)**

In [2]:
# import necessary libraries
import rasterio
import numpy as np
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog, ttk
import matplotlib.pyplot as plt
from PIL import Image, ImageTk
import itertools

In [3]:
# image_path = "winmail.dat"
# with rasterio.open(image_path) as img:
#         img_data = img.read()
#         print("Image loaded successfully !!")
        

### **Creating various functions to perform PCA**

In [4]:
# load the image file 
def load_image(image_path):
    """
    loads the image file 
    output: numpy array of image file 
    """
    with rasterio.open(image_path) as img:
        img_data = img.read()
        print("Image loaded successfully !!")
        return img_data


In [5]:
#  next reshape the data in the form of pixel data (rowwise) and bands coloum wise
def reshape_image(img_data):
    """
    returns reshape data in form pixel,band and height and width
    """
    bands, height, width = img_data.shape
    data = img_data.reshape(bands,-1).T
    return data, height, width, bands

In [6]:
# perform PCA
def do_PCA(data, standardise = True, number_of_components = 3):
    """
    performs PCA based on the user choice of choosing standardise data or not and number of PC (principal Components) chosen
    Returns : transform_data , matrix_used , eigen values , eigen vectors , explained variance , cumulative variance explained 
    """
    
    # based on the choice of standardisation choose the data
    if standardise:
        data_used = (data-np.mean(data,axis=0))/np.std(data,axis=0)
        matrix_used = np.corrcoef(data_used,rowvar=False)
    else:
        data_used = data
        matrix_used = np.cov(data_used,rowvar=False)
    
    # using the matrix_used seleted compute the eigen values and vector 
    eig_val, eig_vec = np.linalg.eig(matrix_used)

    # sort the eigen values in descending order and correspondingly the eigen vectors
    sort_idx_eig_val = np.argsort(eig_val)[::-1]            # descending order of indices of sorted eig_val array
    eig_val = eig_val[sort_idx_eig_val]                     # mask these indices to the eig_values to get the descending order sorted eig_val array 
    eig_vec = eig_vec[:,sort_idx_eig_val]                   # do same thing with the eigen vectors coloumn wise 

    # next calculate the variance explained 
    total_variance = np.sum(eig_val)
    explained_var = eig_val/total_variance
    cumulative_var_explained = np.cumsum(explained_var)
    
    # lastly transform the data over those Principal components that explains the most 
    # based on th number of components choosen from highest var explaination to lowest 
    transform_data = np.dot(data_used,eig_vec[:,:number_of_components])

    # Factor loadings are calculated as eigenvectors multiplied by sqrt of eigenvalues.
    factor_loadings = eig_vec[:, :number_of_components] * np.sqrt(eig_val[:number_of_components])

    return (transform_data, factor_loadings, matrix_used, eig_val,eig_vec, explained_var, cumulative_var_explained)


In [7]:
def reshape_transformed_data(transformed_data, height, width, n_components=3):
    """
    Reshapes the PCA transformed data into images for visualization.
    Returns a list of images (one for each principal component).
    """
    pc_images = []
    for i in range(n_components):
        pc_img = transformed_data[:, i].reshape(height, width)
        pc_images.append(pc_img)
    return pc_images

In [8]:
def display_pc_images(pc_images, explained_variance):
    """
    Displays the principal component images along with explained variance.
    """
    n = len(pc_images)
    plt.figure(figsize=(15, 6))
    for i in range(n):
        plt.subplot(1, n+1, i + 1)
        plt.imshow(pc_images[i], cmap="gray")
        plt.title(f"PC{i+1}\nExplained: {explained_variance[i]*100:.2f}%")
        plt.axis("off")
    
    plt.show()

In [9]:
# next plot the graphical representation of PCA statistics
def plot_PCA_stats(eigen_val , explained_variance , cumulative_var):
    """
    plots the scree plot, % variance explained by PCs, cumulative variance explained 
    """
    # number of PCs starting from the most explained to least 
    PCs = np.arange(1,len(eigen_val)+1)

    # start plotting each of the curve 
    plt.figure(figsize=(12,4))
    
    # plot the scree plot
    plt.subplot(1,3,1)
    plt.plot(PCs,eigen_val,marker= 'o',color='skyblue')
    plt.xlabel("Principal Components")
    plt.ylabel("Eigen Value")
    plt.title("Scree Plot")

    # plot of percentage contribution
    plt.subplot(1,3,2)
    plt.bar(PCs,explained_variance*100,color='orange')
    plt.xlabel("Principal Components")
    plt.ylabel("variance explained (%) ")
    plt.title("Varinace Explained")

    # plot of cumulative variance explained
    plt.subplot(1,3,3)
    plt.plot(PCs,cumulative_var*100,marker = 'o', color= "red")
    plt.xlabel("Principal Components")
    plt.ylabel("Cumulative varince (%)")
    plt.title("Cummulative Varinace Explained")

    plt.tight_layout()
    plt.show()


In [10]:
# next display complete statistics band wise of complete data
def display_complete_stats(data):
        """
        Displays the complete statistical summary of the image file
        """
        _, bands = data.shape
        mean = np.mean(data, axis=0)
        median = np.median(data, axis=0)
        max = np.max(data, axis=0)
        min = np.min(data, axis=0)
        std = np.std(data, axis=0)
        cov = np.cov(data,rowvar=False)
        corr = np.corrcoef(data,rowvar=False)
        
        # printing the statistics 
        print("________________________COMPLETE STATISTICS OF IMAGE DATA________________________\n")
        print("Bands\t\t" + "\t".join([f"Band {m+1}" for m in range(bands)]))       
        print("Mean:\t\t" + "\t".join([f"{m:.2f}" for m in mean]))
        print("Median:\t\t" + "\t".join([f"{m:.2f}" for m in median]))
        print("Max:\t\t" + "\t".join([f"{m:.2f}" for m in max]))
        print("Min:\t\t" + "\t".join([f"{m:.2f}" for m in min]))
        print("Std:\t\t" + "\t".join([f"{m:.2f}" for m in std]))
        print()
        # printing the varinace-covariance matrix
        print("Covariace Matrix of the Image data:")
        print("Bands\t" + "\t".join([f"Band {m+1}" for m in range(bands)])) 
        for i in range(cov.shape[0]):
              print(f"B{i}\t\t" + "\t".join([f"{m:.2f}" for m in cov[:,i]]))  
        print()
        # printing the correlation matrix
        print("Correlation Matrix of the Image Data:")
        print("Bands\t" + "\t".join([f"Band {m+1}" for m in range(bands)])) 
        for i in range(corr.shape[0]):
              print(f"B{i}\t\t" + "\t".join([f"{m:.2f}" for m in corr[:,i]])) 
        print()         

In [11]:
# compile 
def compile_PCA(path_of_image_file,standardize,n_components):
    """
    Compiled function for Performing PCA
    Displays complete information about image data, PCA Analysis Results, PCA generated image
    """
    # Get user input
    # standardize = input("Perform standardized PCA? (y/n): ").lower() == 'y'
    # n_components = int(input("Enter the number of PCs to analyze: "))

    # load the image 
    img_data= load_image(path_of_image_file)

    # reshape the image
    data, h,w,b = reshape_image(img_data)

    # print the image shape
    print()
    print("Image shape")
    print(f"height = {h}\t\twidth = {w}\t\tbands = {b}\n")

    # display image data statistics
    display_complete_stats(data)
    
    # do PCA
    transform_data, factor_loadings, matrix_used, eig_val,eig_vec, explained_var, cumulative_var_explained = do_PCA(data,standardize,n_components)
    
    # display PCA Analysis
    print("________________________PCA ANALYSIS________________________")
    print()
    print(f"Standardisation : {standardize}\n")
    print(f"Number of PCs selected : {n_components}\n")
    print(f"Matrix Used for PCA:")
    print(matrix_used,end="\n\n")
    print(f"Eigen Values obtained: {eig_val}",end="\n")
    print()
    print(f"Eigen Vector obtained: \n{eig_vec}",end="\n")
    print()
    print(f"Factor Loadings: \n{factor_loadings}",end="\n")
    print()
    print(f"Explained Varince (%): {explained_var*100}")
    print()
    print(f"Cumulative Variance explained (%): {cumulative_var_explained*100}")
    print()
    print()
    print("________________________Graphical Plot of PCA Analysis________________________")
    # plot the Graphical plot of PCA statistics
    plot_PCA_stats(eig_val,explained_var,cumulative_var_explained)
    print()

    # reshape the transformed data to display image 
    pc_img = reshape_transformed_data(transform_data,h,w,n_components)

    # display image of all PCs selected from highest to lowest vainance explained
    print(f"________IMAGES OF ALL CHOSEN PCs AND Final IMAGE AFTER STACKING {n_components} SELECTED PC IMAGES TOGETHER________\n")
    display_pc_images(pc_img,explained_var)
    

In [12]:
# created a simple GUI for PCA
# Global variables to hold user inputs
selected_file = ""
num_bands = 0
def Run_gui_PCA():
    

    def select_file():
        global selected_file, num_bands
        selected_file = filedialog.askopenfilename(title="Select Image File")
        if selected_file:
            messagebox.showinfo("File Selected", f"Selected File:\n{selected_file}")
            
            # After file selection, read number of bands
            try:
                with rasterio.open(selected_file) as src:
                    num_bands = src.count
            except Exception as e:
                messagebox.showerror("Error", f"Could not open image. Error: {str(e)}")
                return
            
            # Populate dropdown dynamically
            pc_dropdown['menu'].delete(0, 'end')
            for i in range(1, num_bands + 1):
                pc_dropdown['menu'].add_command(label=str(i), command=tk._setit(pc_choice, str(i)))
            pc_choice.set('1')  # Default selection

    def run_pca():
        if not selected_file:
            messagebox.showwarning("Warning", "Please select an image file first!")
            return
        
        standardize_var = standardize_option.get()
        try:
            n_components_var = int(pc_choice.get())
        except ValueError:
            messagebox.showerror("Error", "Invalid number of components selected.")
            return
        
        # Call your existing compile_PCA function
        label1.config(text="Please Wait... Generating Output\nCheck your Output tab")
        compile_PCA(selected_file,standardize_var,n_components_var)
        label1.config(text="Output has been successfully Generated !!")

    # ----------------- GUI -----------------

    root = tk.Tk()
    root.title("PCA Implementation GUI (220122)")
    root.geometry("400x300")

    standardize_option = tk.BooleanVar(root)
    pc_choice = tk.StringVar(root)
    tk.Button(root, text="Select Image File", command=select_file).pack(pady=10)

    # Checkbox for standardization
    tk.Checkbutton(root, text="Standardize Data", variable=standardize_option).pack(pady=5)

    # Dynamic dropdown
    tk.Label(root, text="Select number of PCs:").pack(pady=5)
    pc_choice.set('1')  # Default value
    pc_dropdown = tk.OptionMenu(root, pc_choice, ())
    pc_dropdown.pack(pady=5)

    # Run button
    tk.Button(root, text="Run PCA", command=run_pca).pack(pady=15)
    label1 = tk.Label(root,text="Once submitted all the output will be displayed in the output Tab")
    label1.pack()
    root.mainloop()


#### Main 

In [13]:
# Main execution
if __name__ == "__main__":
    Run_gui_PCA()