In [None]:
pip install requests pydantic transformers sentence-transformers scikit-learn google-generativeai==0.5.1


In [None]:
!python --version

In [None]:
conda install -c conda-forge ipywidgets

In [28]:
import google.generativeai as genai
import json, re
import os
from dotenv import load_dotenv

load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

In [36]:
def process_with_gemini(text: str):
    """
    Process input text using the Gemini API and return a structured JSON output.
    """
    prompt = f"""
    Process the following text and return the output in the following JSON structure:
    {{
      "title": "Concise title summarizing the content",
      "key_points": ["Point 1", "Point 2", "..."],
      "sentiment": "Positive/Neutral/Negative",
      "entities": ["Entity1", "Entity2", "..."]
    }}
    Text: {text}
    """
    
    try:
        response = model.generate_content(
            prompt
        )
        result = response.text

        title_match = re.search(r'"title":\s*"([^"]+)"', result)
        title = title_match.group(1) if title_match else "N/A"

        key_points_match = re.search(r'"key_points":\s*\[(.*?)\]', result)
        key_points = [point.strip().strip('"') for point in key_points_match.group(1).split(",")] if key_points_match else []

        sentiment_match = re.search(r'"sentiment":\s*"([^"]+)"', result)
        sentiment = sentiment_match.group(1) if sentiment_match else "N/A"

        entities_match = re.search(r'"entities":\s*\[(.*?)\]', result)
        entities = [entity.strip().strip('"') for entity in entities_match.group(1).split(",")] if entities_match else []

        json_output = {
            "title": title,
            "key_points": key_points,
            "sentiment": sentiment,
            "entities": entities
        }
        return json.dumps(json_output, indent=4)  

    except Exception as e:
        print(f"Error processing with Gemini API: {e}")
        return json.dumps({"error": "Failed to process text", "details": str(e)})

In [19]:
from pydantic import BaseModel, ValidationError
from typing import List

class ProcessedData(BaseModel):
    title: str
    key_points: List[str]
    sentiment: str
    entities: List[str]

def validate_response(data: dict):
    try:
        processed = ProcessedData(**data)
        return processed
    except ValidationError as e:
        print("Validation Error:", e.json())
        return None

In [20]:
from transformers import pipeline

def process_with_local_model(text: str):
    model_pipeline = pipeline("text2text-generation", model="your-local-model-path")
    response = model_pipeline(
        f"Process the following text: {text}",
        max_length=1024,
        temperature=0.7
    )
    return response[0]['generated_text']

In [21]:
import time
import random

def api_request_with_retry(request_func, *args, retries=3, backoff_factor=0.3):
    for attempt in range(retries):
        try:
            return request_func(*args)
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(backoff_factor * (2 ** attempt) + random.uniform(0, 0.1))
            else:
                print("API request failed:", e)
                return None

In [22]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

def compare_outputs(api_output, local_output):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    api_embedding = model.encode(" ".join(api_output.get("key_points", [])))
    local_embedding = model.encode(" ".join(local_output.get("key_points", [])))

    similarity = cosine_similarity([api_embedding], [local_embedding])[0][0]
    print("Semantic Similarity:", similarity)

    print("API Sentiment:", api_output.get("sentiment"))
    print("Local Sentiment:", local_output.get("sentiment"))

In [None]:
# Processing with Local Model
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
import os

model_name = "t5-large"
model_path="./local_model_large"

if not os.path.exists(model_path):
    # model_pipeline = pipeline("text2text-generation", model="tuner007/pegasus_paraphrase_large")
    # model_pipeline.save_pretrained(model_path)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)

def process_with_local_model(text: str):
    model_pipeline = pipeline("text2text-generation", model="./local_model_large")
    prompt = get_prompt(text)
    # print(prompt)
    response = model_pipeline(
        prompt,
        do_sample=True,
        # max_length=1024,
        # temperature=0.7
    )
    # print(response)
    return response[0]['generated_text']
# Validating the generated output
from pydantic import BaseModel, ValidationError
from typing import List

class ProcessedData(BaseModel):
    title: str
    key_points: List[str]
    sentiment: str
    entities: List[str]

def validate_response(data: dict):
    try:
        processed = ProcessedData(**data)
        return processed
    except ValidationError as e:
        print("Validation Error:", e.json())
        return None
# Comparing outputs
import time
import random
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

def api_request_with_retry(request_func, *args, retries=3, backoff_factor=0.3):
    for attempt in range(retries):
        try:
            return request_func(*args)
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(backoff_factor * (2 ** attempt) + random.uniform(0, 0.1))
            else:
                print("API request failed:", e)
                return None
            

def compare_outputs(api_output, local_output):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    api_embedding = model.encode(" ".join(api_output.get("key_points", [])))
    local_embedding = model.encode(" ".join(local_output.get("key_points", [])))

    similarity = cosine_similarity([api_embedding], [local_embedding])[0][0]
    print("Semantic Similarity:", similarity)

    print("API Sentiment:", api_output.get("sentiment"))
    print("Local Sentiment:", local_output.get("sentiment"))
# Runner code
if __name__ == "__main__":
    raw_text = "Apple announced the launch of its new iPhone 15 today. Fans are excited about its enhanced camera and longer battery life. Critics, however, feel that the price point is too high."

    gemini_output = process_with_gemini(raw_text)
    print("Gemini Output:")
    print(gemini_output)
    if isinstance(gemini_output, str):
        gemini_output = json.loads(gemini_output) 

    validated_gemini_output = validate_response(gemini_output)

    # local_output = gemini_output
    local_output = process_with_local_model(raw_text)
    print("Local Output:")
    print(local_output)
