In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import speech_recognition as sr

In [2]:
# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize necessary components
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ICE\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ICE\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
categories = {
    "Work": ["meeting", "project", "work", "deadline", "email", "client", "report"],
    "Shopping": ["buy", "purchase", "grocery", "shopping", "order"],
    "Education": ["read", "study", "course", "class", "book", "learn", "education"],
    "Health": ["appointment", "doctor", "health", "medicine", "exercise", "diet"],
    "Miscellaneous": []
}

In [4]:
# Function to preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove non-alphabetic characters
    text = re.sub(r'\s+', ' ', text).strip()  # Replace multiple spaces with a single space
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    return ' '.join(tokens)  # Use space to join tokens

In [5]:
# Function to match an idea to the most relevant category
def categorize_idea(idea):
    for category, keywords in categories.items():
        if any(keyword in idea for keyword in keywords):
            return category
    return "Miscellaneous"

In [6]:
# Function to add a new category
def add_new_category():
    category_name = input("Enter the name of the new category: ").strip().capitalize()
    if category_name in categories:
        print("This category already exists.")
        return
    
    keywords = input("Enter keywords for this category (comma-separated): ").lower().split(',')
    keywords = [keyword.strip() for keyword in keywords]
    
    categories[category_name] = keywords
    print(f"New category '{category_name}' added successfully.")

In [7]:
# Text Input Collection
def collect_text_input():
    idea = input("Please enter your idea: ")
    return idea

In [8]:
# Speech Input Collection
def collect_speech_input():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Please say something...")
        audio = recognizer.listen(source)
        try:
            idea = recognizer.recognize_google(audio)
            print(f"You said: {idea}")
            return idea
        except sr.UnknownValueError:
            print("Could not understand audio")
            return None
        except sr.RequestError:
            print("Could not request results")
            return None

In [9]:
# Storing Ideas in Memory
ideas = []

def store_idea(idea):
    category = categorize_idea(idea)
    ideas.append((idea, category))

# Display grouped ideas
def display_grouped_ideas():
    grouped_ideas = {}
    for idea, category in ideas:
        if category not in grouped_ideas:
            grouped_ideas[category] = []
        grouped_ideas[category].append(idea)
    
    for category, group in grouped_ideas.items():
        print(f"{category}:")
        for idea in group:
            print(f" - {idea}")
        print("\n")
    return grouped_ideas

In [10]:
# Allow user to reassign ideas
def reassign_idea(grouped_ideas):
    while True:
        prompt = input("Do you want to reassign any ideas? (yes/no): ").strip().lower()
        if prompt == 'yes':
            try:
                # Display current groups
                display_grouped_ideas()
                
                # Get user input for reassignment
                idea_to_move = input("Enter the idea you want to move: ")
                from_group = input("Enter the current group of the idea: ").strip().capitalize()
                to_group = input("Enter the new group for the idea: ").strip().capitalize()
                
                # Validate input
                if from_group in grouped_ideas and to_group in grouped_ideas:
                    if idea_to_move in grouped_ideas[from_group]:
                        # Remove idea from the current group and add to the new group
                        grouped_ideas[from_group].remove(idea_to_move)
                        grouped_ideas[to_group].append(idea_to_move)
                        
                        # Update the category in the ideas list
                        for i, (idea, category) in enumerate(ideas):
                            if idea == idea_to_move:
                                ideas[i] = (idea, to_group)
                                break
                        
                        print("Idea reassigned successfully.")
                    else:
                        print("Idea not found in the specified group.")
                else:
                    print("Invalid group name.")
            except ValueError:
                print("Invalid input. Please enter valid values.")
            
            another = input("Do you want to reassign another idea? (yes/no): ").strip().lower()
            if another != 'yes':
                break
        elif prompt == 'no':
            break
        else:
            print("Invalid input. Please enter 'yes' or 'no'.")

In [13]:
def group_ideas_kmeans(ideas, n_clusters=5):
    # Extract just the idea text from the ideas list
    idea_texts = [idea[0] for idea in ideas]
    
    # Determine the number of clusters based on the number of ideas
    n_clusters = min(n_clusters, len(idea_texts))
    
    if n_clusters == 1:
        # If there's only one cluster, return all ideas in a single group
        return {0: idea_texts}
    
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(idea_texts)
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(X)
    labels = kmeans.labels_
    
    grouped_ideas = {}
    for i, label in enumerate(labels):
        if label not in grouped_ideas:
            grouped_ideas[label] = []
        grouped_ideas[label].append(ideas[i][0])
    
    return grouped_ideas

In [14]:
# Main function
def main():
    while True:
        mode = input("Do you want to input text, use the microphone, add a new category, or exit? (text/mic/category/exit): ")
        if mode == 'text':
            idea = collect_text_input()
            if idea:
                preprocessed_idea = preprocess_text(idea)
                store_idea(preprocessed_idea)
                print("Idea stored.")
        elif mode == 'mic':
            idea = collect_speech_input()
            if idea:
                preprocessed_idea = preprocess_text(idea)
                store_idea(preprocessed_idea)
                print("Idea stored.")
        elif mode == 'category':
            add_new_category()
        elif mode == 'exit':
            break
        else:
            print("Invalid input method. Please choose 'text', 'mic', 'category', or 'exit'.")

    if not ideas:
        print("No ideas were stored. Exiting the program.")
        return

    # After collecting ideas, display the grouped ideas
    print("\nIdeas grouped by categories:")
    grouped_ideas = display_grouped_ideas()

    # Allow user to reassign ideas
    reassign_idea(grouped_ideas)
    
    # Display final grouped ideas
    print("\nFinal grouping by categories:")
    display_grouped_ideas()

    # Group ideas using K-means clustering
    kmeans_grouped_ideas = group_ideas_kmeans(ideas)
    
    print("\nIdeas grouped by K-means clustering:")
    for cluster, cluster_ideas in kmeans_grouped_ideas.items():
        print(f"Cluster {cluster}:")
        for idea in cluster_ideas:
            print(f" - {idea}")
        print("\n")

if __name__ == "__main__":
    main()

Do you want to input text, use the microphone, add a new category, or exit? (text/mic/category/exit):  text
Please enter your idea:  Shopping at 2pm


Idea stored.


Do you want to input text, use the microphone, add a new category, or exit? (text/mic/category/exit):  text
Please enter your idea:  Meeting with John


Idea stored.


Do you want to input text, use the microphone, add a new category, or exit? (text/mic/category/exit):  exit



Ideas grouped by categories:
Work:
 - meeting john
 - meeting john


Shopping:
 - shopping pm




Do you want to reassign any ideas? (yes/no):  no



Final grouping by categories:
Work:
 - meeting john
 - meeting john


Shopping:
 - shopping pm



Ideas grouped by K-means clustering:
Cluster 1:
 - meeting john
 - meeting john


Cluster 0:
 - shopping pm




  return fit_method(estimator, *args, **kwargs)
