<a href="https://colab.research.google.com/github/ArtsARKADE/versemagic/blob/main/SPaD/Layer-1/Recommender/Recommender_version_14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from google.colab import drive
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import nltk
import time
nltk.download('punkt')

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

def load_poems_from_folder(folder_path):
    """ Load all poems from text files in the specified Google Drive folder. """
    poems = []
    try:
        files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
        for file in files:
            with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
                poems.append(f.read())
        return poems, f"Loaded {len(files)} poems from {folder_path} successfully."
    except Exception as e:
        return [], f"Failed to load poems from {folder_path}: {e}"

def compare_with_poetry_database(user_text, poems, max_results):
    """ Compare the user's input text with the poems using cosine similarity. """
    documents = [user_text] + poems
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
    cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    return [(poems[i], cosine_similarities[i]) for i in np.argsort(cosine_similarities)[::-1][:max_results]]

def highlight_similar_parts(text, reference):
    """ Highlight words in the text that are also found in the reference and are longer than two characters. """
    reference_words = set(word.lower() for word in reference.split() if len(word) > 2)
    highlighted_text = ' '.join(f"<mark>{word}</mark>" if word.lower() in reference_words else word for word in text.split())
    return highlighted_text

def save_changes_to_drive(path, content):
    """ Ensure directory exists and save the content with a timestamp. """
    if not path:
        path = '/content/drive/My Drive/Edited Poems'
    if not os.path.exists(path):
        os.makedirs(path)
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    full_path = os.path.join(path, f"edited_poem_{timestamp}.txt")
    with open(full_path, 'w') as file:
        file.write(content)
    return full_path

def calculate_changes(original, edited):
    """ Calculate and return added and removed words between original and edited texts. """
    original_words = set(original.split())
    edited_words = set(edited.split())
    added = edited_words - original_words
    removed = original_words - edited_words
    return added, removed

# Event Handlers
def submit_poem(b):
    with results_area:
        clear_output()
        recommendations = compare_with_poetry_database(text_area.value, poems, max_results_input.value)
        for poem, score in recommendations:
            display(HTML(f"<div style='margin-top: 5px;'><strong>Score: {score:.3f}</strong><br>{highlight_similar_parts(poem, text_area.value)}</div>"))

def save_changes(b):
    with change_output:
        clear_output()
        original, edited = text_area.value, edit_area.value
        added, removed = calculate_changes(original, edited)
        changes = f"Added words: {', '.join(added)}\nRemoved words: {', '.join(removed)}"
        path = save_changes_to_drive(save_path_input.value, f"Original Text:\n{original}\n\nEdited Text:\n{edited}\n\nChanges:\n{changes}")
        print(f"Changes saved successfully to {path}.")

# UI Components
folder_path_input = widgets.Text(description="Poem Database Path:")
load_button = widgets.Button(description="Load Poems")
save_path_input = widgets.Text(description="Save Path for Edits:", placeholder="Optional, leave blank for default")
text_area = widgets.Textarea(description='Your Poem:', layout=widgets.Layout(width='500px', height='100px'))
max_results_input = widgets.BoundedIntText(value=5, min=1, max=20, step=1, description='Max Results:')
submit_button = widgets.Button(description='Submit Poem', button_style='success')
results_area = widgets.Output()
edit_area = widgets.Textarea(description='Edit Poem:', layout=widgets.Layout(width='500px', height='100px'))
edit_button = widgets.Button(description='Save Changes', button_style='info')
change_output = widgets.Output()

# Layout and Display
load_button.on_click(lambda b: load_poems(folder_path_input.value))
submit_button.on_click(submit_poem)
edit_button.on_click(save_changes)

ui = widgets.VBox([
    folder_path_input, load_button, save_path_input,
    text_area, max_results_input, submit_button,
    results_area, edit_area, edit_button, change_output
])

display(ui)

def load_poems(folder_path):
    global poems
    poems, message = load_poems_from_folder(folder_path)
    with change_output:
        clear_output()
        print(message)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Mounted at /content/drive


VBox(children=(Text(value='', description='Poem Database Path:'), Button(description='Load Poems', style=Butto…