In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fuzzywuzzy import process
import re
import speech_recognition as sr
from tkinter import Tk, filedialog

# Function to normalize column names
def normalize_column_name(name):
    return re.sub(r'[^a-zA-Z0-9]', '', name.lower())

# Function to match spoken column names or numbers
def match_column_input(input_str, df):
    if input_str.isdigit() and int(input_str) < len(df.columns):
        return df.columns[int(input_str)]
    else:
        column_names = df.columns.tolist()
        normalized_columns = {normalize_column_name(col): col for col in column_names}
        best_match, score = process.extractOne(normalize_column_name(input_str), normalized_columns.keys())
        return normalized_columns[best_match] if score > 50 else None

# Python sorting function for improved performance
def python_sort(data):
    return sorted(data)

# Improved Voice recognition for hands-free command input
def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)  # Improved noise calibration
        print("Listening for voice command... Speak now.")
        try:
            audio = recognizer.listen(source, timeout=15, phrase_time_limit=8)
            command = recognizer.recognize_google(audio).lower()
            print(f"Command recognized: {command}")
            return command
        except sr.UnknownValueError:
            print("Sorry, I couldn't understand that.")
            return None
        except sr.RequestError:
            print("Network error. Please check your connection.")
            return None

# Improved fuzzy matching for commands
def fuzzy_match_command(command):
    possible_commands = [
        "show columns", "show first rows", "check missing values",
        "clean data", "show histogram", "show scatter plot",
        "show correlation heatmap", "export data", "exit"
    ]
    best_match, score = process.extractOne(command, possible_commands)
    return best_match if score > 60 else None

# Load the dataset
root = Tk()
root.withdraw()
file_path = filedialog.askopenfilename(title="Select a dataset", filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
print("Dataset loaded successfully!")

# Continuous Voice Command Mode
print("Listening continuously... Say 'exit' to stop.")
print("### Available Commands:")
print("- 'Show columns'")
print("- 'Show first rows'")
print("- 'Check missing values'")
print("- 'Clean data'")
print("- 'Show histogram'")
print("- 'Show scatter plot'")
print("- 'Show correlation heatmap'")
print("- 'Export data'")
print("- 'Exit' (to stop continuous listening)")

while True:
    command = recognize_speech()
    if command:
        command = fuzzy_match_command(command)
        if command == "exit":
            print("Voice mode ended.")
            break
        elif command == "show columns":
            print(df.columns.tolist())
        elif command == "show first rows":
            print(df.head())
        elif command == "check missing values":
            print(df.isnull().sum())
        elif command == "clean data":
            df.fillna(method='ffill', inplace=True)
            print("Missing values filled using forward fill.")
        elif command == "show histogram":
            column = input("Select a column for histogram: ")
            plt.figure(figsize=(6, 4))
            sns.histplot(df[column], kde=True)
            plt.show()
        elif command == "show correlation heatmap":
            numeric_df = df.select_dtypes(include=['float64', 'int64'])
            if numeric_df.shape[1] > 1:
                plt.figure(figsize=(8, 6))
                sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')
                plt.show()
                print("### Summary of Correlation Values:")
                print(numeric_df.corr())
            else:
                print("Insufficient numeric data for correlation heatmap.")
        else:
            print("Command not recognized. Try again.")


Dataset loaded successfully!
Listening continuously... Say 'exit' to stop.
### Available Commands:
- 'Show columns'
- 'Show first rows'
- 'Check missing values'
- 'Clean data'
- 'Show histogram'
- 'Show scatter plot'
- 'Show correlation heatmap'
- 'Export data'
- 'Exit' (to stop continuous listening)
Listening for voice command... Speak now.
Command recognized: show column
['Year', 'Industry_aggregation_NZSIOC', 'Industry_code_NZSIOC', 'Industry_name_NZSIOC', 'Units', 'Variable_code', 'Variable_name', 'Variable_category', 'Value', 'Industry_code_ANZSIC06']
Listening for voice command... Speak now.
Command recognized: value value
Year                           0
Industry_aggregation_NZSIOC    0
Industry_code_NZSIOC           0
Industry_name_NZSIOC           0
Units                          0
Variable_code                  0
Variable_name                  0
Variable_category              0
Value                          0
Industry_code_ANZSIC06         0
dtype: int64
Listening for voice 