In [81]:
pip install openai pydantic streamlit transformers torch



In [106]:
OPENAI_API_KEY="api-key"


In [107]:
from pydantic import BaseModel
from typing import List, Optional

class ProcessedText(BaseModel):
    text: str
    sentiment: Optional[str] = None
    entities: Optional[List[str]] = None
    keywords: Optional[List[str]] = None


In [108]:
import openai
from typing import Dict

In [109]:
# Set up OpenAI API key
openai.api_key = "api-key"

In [110]:
def process_text_with_openai(text: str) -> Dict:
    prompt = f"Process the following text and provide sentiment, entities, and keywords:\n\n{text}"

    try:
        response = openai.Completion.create(
            model="text-davinci-003",  # Use the appropriate model
            prompt=prompt,
            max_tokens=150
        )
        response_text = response.choices[0].text.strip()

        # Example of parsing output; adjust based on actual API output structure
        processed_data = {
            "text": text,
            "sentiment": "positive",  # This is just an example; modify based on API response
            "entities": ["AI", "text data"],  # Example entities
            "keywords": ["AI", "text"]
        }

        # Validate with Pydantic
        processed_text = ProcessedText(**processed_data)
        return processed_text.dict()

    except Exception as e:
        return {"error": str(e)}

In [111]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [113]:
# Load a smaller, publicly available model like DistilGPT-2
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")

In [114]:
def process_text_with_local_model(text: str) -> Dict:
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model.generate(inputs['input_ids'], max_length=100)

    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Example of parsing output, adjust as needed
    processed_data = {
        "text": text,
        "sentiment": "neutral",  # Modify as needed
        "entities": ["AI", "model"],
        "keywords": ["LLM", "text processing"]
    }

    # Validate with Pydantic
    processed_text = ProcessedText(**processed_data)
    return processed_text.dict()

In [115]:
def compare_results(text: str):
    api_result = process_text_with_openai(text)
    local_result = process_text_with_local_model(text)

    return {
        "OpenAI Result": api_result,
        "Local Model Result": local_result
    }


In [116]:
import streamlit as st

In [117]:
# Streamlit app
st.title("Text Processing Pipeline")

input_text = st.text_area("Enter Raw Text Here:")

if st.button("Process Text"):
    if input_text:
        results = compare_results(input_text)

        st.subheader("OpenAI API Result:")
        st.json(results["OpenAI Result"])

        st.subheader("Local Model Result:")
        st.json(results["Local Model Result"])
    else:
        st.error("Please enter some text to process.")



In [None]:
!streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.188.204.81:8501[0m
[0m
