<a href="https://colab.research.google.com/github/Mad-HuB1/Interactive-GPT/blob/main/interactive_GPT_working.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-groq shutup sentence-transformers faiss-gpu pandasai langchain-community pypdf python-docx

Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-

In [None]:
import os
import pandas as pd
import json
import shutup
from docx import Document

from pandasai import SmartDataframe

from langchain_groq import ChatGroq

from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate

shutup.please()

In [None]:
groq_api_key = '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'

In [None]:
def chat_with_dataframe(df, query):
  llm = ChatGroq(
      groq_api_key = groq_api_key,
      model = 'llama3-8b-8192',
      temperature = 0
  )
  pandas_ai = SmartDataframe(df, config={"llm": llm})
  return pandas_ai.chat(query)

def chat_with_txt(file_path, query):
  loader = TextLoader(file_path)
  docs = loader.load()

  text_splitter = RecursiveCharacterTextSplitter()
  documents = text_splitter.split_documents(docs)

  embeddings = HuggingFaceEmbeddings(
        model_name='mixedbread-ai/mxbai-embed-large-v1',
        model_kwargs={'truncate_dim': 64},
        encode_kwargs={'precision': 'binary'}
    )

  vector = FAISS.from_documents(documents, embeddings)
  retriever = vector.as_retriever()

  model = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name='llama3-8b-8192')

  prompt = ChatPromptTemplate.from_template(
        """
        Answer the following question based only on the provided context:

        NOTE: If you don't know the answer, just say that you don't know. Don't try to make up an answer.

        <context>
        {context}
        </context>

        Question: {input}
        """
    )

  document_chain = create_stuff_documents_chain(model, prompt)
  retrieval_chain = create_retrieval_chain(retriever, document_chain)

  response = retrieval_chain.invoke({'input': query})

  return response['answer']

def chat_with_pdf(file_path, query):
  loader = PyPDFLoader(file_path)
  docs = loader.load()

  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
  splits = text_splitter.split_documents(docs)

  embeddings = HuggingFaceEmbeddings()

  vectorstore = FAISS.from_documents(splits, embeddings)
  retriever = vectorstore.as_retriever()

  model = ChatGroq(model="llama3-8b-8192", groq_api_key=groq_api_key)

  prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}"),
            ("human", "{input}"),
        ]
    )

  document_chain = create_stuff_documents_chain(model, prompt)
  rag_chain = create_retrieval_chain(retriever, document_chain)

  results = rag_chain.invoke({"input": query})

  return results['answer']

def chat_with_docx(file_path, query):
  doc = Document(file_path)

  full_text = []
  for para in doc.paragraphs:
    full_text.append(para.text)

  text_content = '\n'.join(full_text)

  with open('/tmp/temp.txt', 'w') as f:
    f.write(text_content)

  return chat_with_txt('/tmp/temp.txt', query)

def chat_with_file(file_path, query):
  file_extension = os.path.splitext(file_path)[1].lower()

  if file_extension == '.txt':
    return chat_with_txt(file_path, query)

  elif file_extension == '.pdf':
    return chat_with_pdf(file_path, query)

  elif file_extension == '.docx':
    return chat_with_docx(file_path, query)

  elif file_extension == '.csv':
    df = pd.read_csv(file_path)
    return chat_with_dataframe(df, query)

  elif file_extension == '.xlsx' or file_extension == '.xls':
    df = pd.read_excel(file_path)
    return chat_with_dataframe(df, query)

  elif file_extension == '.json':
    with open(file_path, 'r') as f:
      data = json.load(f)
    df = pd.json_normalize(data)
    return chat_with_dataframe(df, query)

  else:
    return 'Unsupported file type'

In [None]:
!pip install --upgrade torch torchvision

Collecting torch
  Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metad

In [None]:
# def chat_with_pdf(file_path, query):
#     loader = PyPDFLoader(file_path)
#     docs = loader.load()

#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)  # Increase overlap
#     splits = text_splitter.split_documents(docs)

#     embeddings = HuggingFaceEmbeddings()

#     vectorstore = FAISS.from_documents(splits, embeddings)
#     retriever = vectorstore.as_retriever()

#     model = ChatGroq(model="llama3-8b-8192", groq_api_key=groq_api_key)

#     # Change 'input' to 'question' in the prompt template:
#     prompt = ChatPromptTemplate.from_messages(
#         [
#             ("system", "You are an assistant for finding information in documents.  Please provide a list of all 5 ways to grow a business as discussed in the provided context.\n\n{context}"),  # Refined prompt
#             ("human", "{question}"),  # Changed 'input' to 'question'
#         ]
#     )

#     document_chain = create_stuff_documents_chain(model, prompt)

#     # Use map_reduce chain type for improved results:
#     from langchain.chains import RetrievalQAWithSourcesChain
#     rag_chain = RetrievalQAWithSourcesChain.from_chain_type(
#         llm=model, chain_type="map_reduce", retriever=retriever,
#         return_source_documents=True, chain_type_kwargs={"question_prompt": prompt}
#     )

#     results = rag_chain({"question": query})

#     return results['answer']

In [None]:
# file_path = '/content/5WaystoGrowBiz.pdf'
# query = 'What are the five ways to grow discussed in the document?'

# print(chat_with_pdf(file_path, query))

In [None]:
file_path = '/content/5WaystoGrowBiz.pdf'
query = 'What are the five ways to grow discussed in the document?'

print(chat_with_file(file_path, query))

The document discusses the following five key areas to jump-start your growth plan:

1. Strategy
2. Operational Efficiency
3. People and Performance
4. (Not explicitly mentioned, but implied as part of the overall growth plan)
5. (Not explicitly mentioned, but implied as part of the overall growth plan)

Note that the document does not explicitly mention five distinct ways to grow, but rather focuses on highlighting the importance of these five areas in achieving business growth.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [None]:
import pandas as pd

data = pd.read_csv('/content/House.csv')

data.to_json('House.json')

In [None]:
file_path = '/content/House.json'
query = 'Which loation are has highest house price?'

print(chat_with_file(file_path, query))