## Pinecone

In [2]:

import chromadb
import os
import argparse
import pathlib
import textwrap
import google.generativeai as genai
from IPython.display import  display,Markdown
import chromadb
from chromadb.utils import  embedding_functions

## API key

In [None]:
PINECONE_API_KEY="f9812459-f751-4e1a-85eb-f4d4b06df83b"

## Initialize a Client

In [1]:
PINECONE_API_KEY="f9812459-f751-4e1a-85eb-f4d4b06df83b"

In [2]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("quickstart")

In [8]:
import os
import pinecone
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from chromadb.utils import embedding_functions  # Ensure correct import for Google Gemini embeddings
# Import genai for Google Gemini model

class yt_search_pinecone_gemini:
    def __init__(self, yt_video_id, pinecone_api_key, google_api_key):
        os.environ['PINECONE_API_KEY'] = pinecone_api_key
        os.environ['Google_API_KEY'] = google_api_key
        self.yt_video_id = yt_video_id
        self.m = genai.GenerativeModel('gemini-1.5-flash')

        # Initialize Pinecone with specific host and configuration
        pinecone.init(api_key=pinecone_api_key, environment="us-east-1")

        self.index_name = "yt_notes"
        self.host = "https://quickstart-mpiz5jh.svc.aped-4627-b74a.pinecone.io"

        # Create or connect to the Pinecone index with the correct dimensions and metric
        if self.index_name not in pinecone.list_indexes():
            pinecone.create_index(self.index_name, dimension=8, metric='cosine')

        self.index = pinecone.Index(self.index_name)

        # Initialize Google Gemini embedding function
        self.gemini_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=google_api_key)

    def fetch_transcript(self):
        # Fetch transcript from YouTube video
        Transcripts = YouTubeTranscriptApi.get_transcript(self.yt_video_id, languages=['en', 'en-us', 'en-GB'])
        Transcripts = TextFormatter().format_transcript(Transcripts)

        # Save transcript to a temporary file
        with open("temp_transcript.txt", 'w') as file:
            file.write(Transcripts)

        return Transcripts

    def get_gemini_embeddings(self, text):
        # Generate embeddings using Google Gemini
        return self.gemini_ef(text)  # Make sure this returns embeddings of the correct shape

    def upsert_transcript(self, notes):
        # Generate embeddings from the transcript
        embeddings = self.get_gemini_embeddings(notes)

        # Ensure embeddings have correct dimensions
        if len(embeddings) != 8:
            raise ValueError(f"Embeddings have incorrect dimension: {len(embeddings)}")

        # Upsert into Pinecone
        self.index.upsert([(self.yt_video_id, embeddings)])

    def search_notes(self, query_text, n_results=5):
        # Generate embedding for the query text using Gemini
        query_embedding = self.get_gemini_embeddings(query_text)

        # Ensure query embedding has correct dimensions
        if len(query_embedding) != 8:
            raise ValueError(f"Query embedding has incorrect dimension: {len(query_embedding)}")

        # Query Pinecone index for similar documents
        search_results = self.index.query(queries=[query_embedding], top_k=n_results)

        for i, match in enumerate(search_results['matches']):
            id = match['id']
            score = match['score']
            print("**********************")
            print(f"{i + 1}. https://youtu.be/{id} (score: {score})")
            print("***********************")
        
        return search_results

    def generate_answer(self, query_text, document):
        # Generate response using Gemini model
        prompt = "answer the following Question using Document as Context\n"
        prompt += f"QUESTION: {query_text}\n"
        prompt += f"DOCUMENT: {document}\n"

        response = self.m.generate_content(prompt, stream=False)
        print(response.text)
        return response.text


# Example usage
yt_video_id = "gYhY-k4DQvE"  # Replace with your video ID
pinecone_api_key = "ec6284af-935c-4437-86d8-c616ecb1dec2"
google_api_key = "AIzaSyCYDpCdOrOeYB4hLneRsFjqe23SkUP2Mq4"

# Create an instance of the class
yt_search_instance = yt_search_pinecone_gemini(yt_video_id, pinecone_api_key, google_api_key)

# Fetch and upsert transcript
transcript = yt_search_instance.fetch_transcript()
yt_search_instance.upsert_transcript(transcript)

# Search notes and generate a response
query_text = "chromadb?"
search_results = yt_search_instance.search_notes(query_text)
if search_results['matches']:
    yt_search_instance.generate_answer(query_text, search_results['matches'][0]['id'])


AttributeError: init is no longer a top-level attribute of the pinecone package.

Please create an instance of the Pinecone class instead.

Example:

    import os
    from pinecone import Pinecone, ServerlessSpec

    pc = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")
    )

    # Now do stuff
    if 'my_index' not in pc.list_indexes().names():
        pc.create_index(
            name='my_index', 
            dimension=1536, 
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-west-2'
            )
        )

