#Please run the program only in Jupyter Notebook and not on collab, otherwise the GUI will not be properly generated and the code may not run properly.

In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import csv
import numpy as np

class PCA_GUI:
    def __init__(self, root):
        self.root = root
        self.root.title("PCA GUI - From Scratch")
        self.dataset = None
        self.column_names = None
        self.transformed_data = None
        self.target_column = None

        self.upload_button = tk.Button(root, text="Upload Dataset", command=self.upload_dataset)
        self.upload_button.pack(pady=10)

        self.target_label = tk.Label(root, text="Select Target Variable:")
        self.target_label.pack()

        self.target_var = tk.StringVar(root)
        self.target_dropdown = ttk.Combobox(root, textvariable=self.target_var, state="readonly")
        self.target_dropdown.pack(pady=10)

        self.label = tk.Label(root, text="Number of Principal Components")
        self.label.pack()
        self.n_components_entry = tk.Entry(root)
        self.n_components_entry.pack(pady=10)

        self.pca_button = tk.Button(root, text="Perform PCA", command=self.perform_pca)
        self.pca_button.pack(pady=10)

        self.download_button = tk.Button(root, text="Download Transformed Dataset", command=self.download_dataset, state=tk.DISABLED)
        self.download_button.pack(pady=10)

        self.output_box = tk.Text(root, height=20, width=80)
        self.output_box.pack(pady=10)

    def upload_dataset(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
        if file_path:
            try:
                with open(file_path, 'r') as file:
                    reader = csv.reader(file)
                    data = []
                    for row in reader:
                        if any(row):
                            data.append(row)
                    self.column_names = data[0]
                    self.dataset = data[1:]
                self.target_dropdown['values'] = self.column_names
                messagebox.showinfo("Success", "Dataset uploaded successfully!")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to upload dataset: {e}")

    def is_float(self, value):
        try:
            float(value)
            return True
        except ValueError:
            return False

    def one_hot_encode(self, X):
        encoded_data = []
        encoded_column_names = []
        for i in range(len(X[0])):
            column = []
            for row in X:
                column.append(row[i])
            if all(self.is_float(val) for val in column):
                encoded_column = []
                for val in column:
                    encoded_column.append(float(val))
                encoded_data.append(encoded_column)
                encoded_column_names.append(self.column_names[i])
            else:
                unique_vals = sorted(set(column))
                for unique_val in unique_vals:
                    encoded_column_names.append(f"{self.column_names[i]}_{unique_val}")
                    encoded_column = []
                    for val in column:
                        if val == unique_val:
                            encoded_column.append(1)
                        else:
                            encoded_column.append(0)
                    encoded_data.append(encoded_column)
        X_encoded = np.array(encoded_data).T
        return X_encoded, encoded_column_names

    def standard_scaling(self, X):
        mean = np.mean(X, axis=0)
        std = np.std(X, axis=0)
        X_scaled = []
        for row in X:
            scaled_row = []
            for i in range(len(row)):
                scaled_value = (row[i] - mean[i]) / std[i]
                scaled_row.append(scaled_value)
            X_scaled.append(scaled_row)
        return np.array(X_scaled)

    def perform_pca(self):
        if self.dataset is None:
            messagebox.showwarning("Warning", "Please upload a dataset first!")
            return
        if not self.target_var.get():
            messagebox.showwarning("Warning", "Please select a target variable!")
            return
        try:
            n_components = int(self.n_components_entry.get())
        except ValueError:
            messagebox.showwarning("Warning", "Please enter a valid number for components!")
            return
        if n_components <= 0:
            messagebox.showwarning("Warning", "Invalid number of components!")
            return

        X_encoded, encoded_column_names = self.one_hot_encode(self.dataset)
        target_index = self.column_names.index(self.target_var.get())
        target_data = []
        for row in self.dataset:
            target_data.append(row[target_index])
        X_no_target = np.delete(X_encoded, target_index, axis=1)

        if n_components > X_no_target.shape[1]:
            messagebox.showwarning("Warning", "Number of components exceeds the number of features!")
            return

        X_scaled = self.standard_scaling(X_no_target)
        X_centered = X_scaled - np.mean(X_scaled, axis=0)
        cov_matrix = np.cov(X_centered, rowvar=False)
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
        sorted_indices = np.argsort(eigenvalues)[::-1]
        sorted_eigenvalues = eigenvalues[sorted_indices]
        sorted_eigenvectors = eigenvectors[:, sorted_indices]
        selected_eigenvectors = sorted_eigenvectors[:, :n_components]
        X_reduced = np.dot(X_centered, selected_eigenvectors)
        X_reduced_with_target = np.column_stack((X_reduced, target_data))
        self.transformed_data = X_reduced_with_target

        self.download_button.config(state=tk.NORMAL)
        self.output_box.delete("1.0", tk.END)
        self.output_box.insert(tk.END, f"Transformed Dataset (First 5 rows):\n{X_reduced_with_target[:5]}\n\n")
        self.output_box.insert(tk.END, f"Eigenvalues:\n{sorted_eigenvalues[:n_components]}\n\n")
        self.output_box.insert(tk.END, f"Eigenvectors (Principal Components):\n{selected_eigenvectors}\n")

    def download_dataset(self):
        if self.transformed_data is None:
            messagebox.showerror("Error", "No transformed data to download!")
            return
        file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
        if not file_path:
            return
        try:
            with open(file_path, 'w', newline='') as file:
                writer = csv.writer(file)
                header = []
                for i in range(self.transformed_data.shape[1] - 1):
                    header.append(f"PC{i+1}")
                header.append(self.target_var.get())
                writer.writerow(header)
                for row in self.transformed_data:
                    writer.writerow(row)
            messagebox.showinfo("Success", "Transformed dataset saved successfully!")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to save dataset: {e}")

root = tk.Tk()
app = PCA_GUI(root)
root.mainloop()
