In [None]:
from google.colab import drive
from getpass import getpass
import os
from collections import defaultdict
import re

# Mount Google Drive
drive.mount('/content/drive')

# Configuration
GITHUB_USERNAME = "SanJoao"
REPO_NAME = "Quotes-Kindle-Paperwhite"

# Get GitHub token securely
token = getpass('Enter your GitHub token: ')

# Clone repository with token
clone_url = f"https://{token}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.wiki.git"
!git clone $clone_url

# Configure git
!git config --global user.email "your-email@example.com"
!git config --global user.name "Your Name"

def clean_filename(title):
    """Convert book title to a valid GitHub wiki page name."""
    clean = re.sub(r'[^\w\s-]', '', title)
    return clean.strip().replace(' ', '-')

def process_kindle_clippings(file_path):
    """Process Kindle clippings file and return organized quotes by book."""
    book_quotes = defaultdict(list)

    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    sections = [s.strip() for s in content.split('==========') if s.strip()]

    for section in sections:
        lines = [line.strip() for line in section.split('\n') if line.strip()]

        if len(lines) >= 2 and '- Your Highlight' in lines[1]:
            book = lines[0]

            date_match = re.search(r'Added on (.*?)(?:\s+\||$)', lines[1])
            date = date_match.group(1) if date_match else "Unknown Date"

            quote_index = next((i for i, line in enumerate(lines) if 'Added on' in line), -1) + 1
            if quote_index < len(lines):
                quote = lines[quote_index]

                if quote and len(quote) > 10:
                    book_quotes[book].append({
                        'text': quote,
                        'date': date
                    })

    return book_quotes

def generate_home_page(book_quotes):
    """Generate the main index page."""
    output = '# Kindle Highlights Collection\n\n'

    total_quotes = sum(len(quotes) for quotes in book_quotes.values())
    output += f'**Total Books:** {len(book_quotes)}  \n'
    output += f'**Total Highlights:** {total_quotes}\n\n'

    output += '## Books\n\n'
    output += '| Book | Number of Highlights |\n'
    output += '|------|--------------------|\n'

    # Sort books by number of quotes (descending)
    books_by_quotes = sorted(book_quotes.items(), key=lambda x: len(x[1]), reverse=True)

    for book, quotes in books_by_quotes:
        safe_name = clean_filename(book)
        # Use the cleaned filename in the link but show the original book title
        output += f'| [{book}]({safe_name}) | {len(quotes)} |\n'

    return output

def generate_book_page(book, quotes):
    """Generate individual book page."""
    output = f'# {book}\n\n'
    output += '[[Home]]\n\n'
    output += f'**Number of highlights:** {len(quotes)}\n\n'

    sorted_quotes = sorted(quotes, key=lambda x: x['date'])

    current_date = None
    for i, quote in enumerate(sorted_quotes, 1):
        if quote['date'] != current_date:
            current_date = quote['date']
            output += f'\n## {current_date}\n\n'

        output += f'{i}. "{quote["text"]}"\n\n'

    return output

def main():
    wiki_dir = f'{REPO_NAME}.wiki'

    # Process clippings
    print("Processing Kindle clippings...")
    book_quotes = process_kindle_clippings('/content/My Clippings.txt')

    print("\nGenerating wiki pages...")

    # Generate and save Home page
    home_content = generate_home_page(book_quotes)
    with open(f'{wiki_dir}/Home.md', 'w', encoding='utf-8') as f:
        f.write(home_content)

    # Generate and save book pages
    for book, quotes in book_quotes.items():
        wiki_name = clean_filename(book)
        book_content = generate_book_page(book, quotes)
        with open(f'{wiki_dir}/{wiki_name}.md', 'w', encoding='utf-8') as f:
            f.write(book_content)

    # Commit and push changes
    print("\nCommitting changes...")
    os.chdir(wiki_dir)
    !git add .
    !git commit -m "Update Kindle highlights with fixed links"

    # Push using token
    push_url = f"https://{token}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.wiki.git"
    !git push $push_url

    print("\nProcessing complete!")
    print(f"Total books found: {len(book_quotes)}")
    print(f"Total quotes processed: {sum(len(quotes) for quotes in book_quotes.values())}")

# Run the script
main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Enter your GitHub token: ··········
fatal: destination path 'Quotes-Kindle-Paperwhite.wiki' already exists and is not an empty directory.
Processing Kindle clippings...

Generating wiki pages...

Committing changes...
[master 3092f61] Update Kindle highlights with fixed links
 1 file changed, 51 insertions(+), 51 deletions(-)
 rewrite Home.md (94%)
Enumerating objects: 5, done.
Counting objects: 100% (5/5), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 2.49 KiB | 2.49 MiB/s, done.
Total 3 (delta 1), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
To https://github.com/SanJoao/Quotes-Kindle-Paperwhite.wiki.git
   dbbaa03..3092f61  master -> master

Processing complete!
Total books found: 42
Total quotes processed: 1026
