<a href="https://colab.research.google.com/github/Ashail33/textclustering/blob/main/stickies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
!pip install streamlit streamlit_jupyter pytesseract openai==0.28 pillow google-cloud-vision



In [41]:
# Step 1: Mount Google Drive and set environment variables
from google.colab import drive
import os
from google.colab import userdata

# Mount Google Drive
drive.mount('/content/drive')

# Set the environment variable for Google Cloud Vision API
google_credentials_path = "/content/drive/My Drive/Stickies/stickies.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = google_credentials_path

# # Verify the content of the credentials file
# with open(google_credentials_path, 'r') as file:
#     content = file.read()
#     print(content)

# Define your OpenAI API key
openai_api_key = userdata.get('openai_key')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
app_code = f"""
import os
import streamlit as st
from google.cloud import vision_v1 as vision
from PIL import Image
import openai
import io
import pandas as pd

# Set the environment variable for Google Cloud Vision API
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "{google_credentials_path}"

# Configure OpenAI API key
openai.api_key = '{openai_api_key}'

# Configure Google Cloud Vision client
client = vision.ImageAnnotatorClient()

def ocr_image(image_content):
    # Extract text from an image using Google Vision API
    image = vision.Image(content=image_content)
    response = client.text_detection(image=image)
    texts = response.text_annotations
    if texts:
        return texts[0].description
    else:
        return ""

def resize_image_if_needed(image, max_size=(1024, 1024)):
    # Resize the image if it exceeds the maximum allowed size
    if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
        image.thumbnail(max_size, Image.ANTIALIAS)
    return image

def categorize_text(text):
    # Categorize text using OpenAI's GPT model
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {{"role": "system", "content": "You are a helpful assistant, you will go through the text and categorise the sentences provided into as minimal categories as possible. Please ensure to highlight the symbols or words that do not make sense"}},
            {{"role": "user", "content": f"Categorize the following text into meaningful categories:\\n\\n{{text}}\\n\\nProvide a list of categories and assign each piece of text to a category."}}
        ]
    )
    categories = response.choices[0].message['content'].strip()
    return categories

def main():
    st.title("Sticky Note OCR and Categorization App")

    st.header("Upload your sticky notes")
    uploaded_files = st.file_uploader("Choose image files", accept_multiple_files=True, type=["png", "jpg", "jpeg"])

    all_text = ""
    ocr_results = []

    if uploaded_files:
        st.header("Uploaded Images")
        cols = st.columns(3)  # Adjust the number of columns as needed
        for i, uploaded_file in enumerate(uploaded_files):
            with cols[i % 3]:  # Display images in a 3-column layout
                image = Image.open(uploaded_file)
                st.image(image, caption=uploaded_file.name, use_column_width=True)

        st.header("OCR Results")
        for uploaded_file in uploaded_files:
            with st.spinner(f"Processing {{uploaded_file.name}}..."):
                image = resize_image_if_needed(Image.open(uploaded_file))
                image_byte_array = io.BytesIO()
                image.save(image_byte_array, format='PNG')
                image_content = image_byte_array.getvalue()

                text = ocr_image(image_content)
                ocr_results.append({{"Image": uploaded_file.name, "Text": text}})
                all_text += text + "\\n"

        if ocr_results:
            df = pd.DataFrame(ocr_results)
            st.write(df)

        if all_text:
            if st.button("Categorize Text"):
                with st.spinner("Categorizing text..."):
                    categories = categorize_text(all_text)
                    st.write("Categorized Text:")
                    st.write(categories)

if __name__ == "__main__":
    main()
"""

In [43]:

# # Write the content to app.py
with open("app.py", "w") as file:
    file.write(app_code)

# Step 3: Display the public IP address for localtunnel and run the apps
import subprocess
import urllib.request
import time
import streamlit_jupyter as stj

# Display the public IP address for localtunnel
public_ip = urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n")
print("Password/Endpoint IP for localtunnel is:", public_ip)

# Install localtunnel if not already installed
!npm install -g localtunnel

# Function to get the localtunnel URL
def get_localtunnel_url():
    lt_process = subprocess.Popen(["npx", "localtunnel", "--port", "8501"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while True:
        output = lt_process.stdout.readline().decode('utf-8').strip()
        if 'url' in output:
            print(output)
            break
        time.sleep(1)

# Run the Streamlit app in the background
streamlit_process = subprocess.Popen(["streamlit", "run", "app.py"])

# Get and display the localtunnel URL
get_localtunnel_url()

Password/Endpoint IP for localtunnel is: 35.234.39.42
[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/lt.js
+ localtunnel@2.0.2
updated 1 package in 2.161s
your url is: https://blue-bees-stick.loca.lt
