In [1]:
import pandas as pd
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document


In [2]:
df=pd.read_csv(r"C:\Users\adity\Desktop\gemma\interview_questions.csv")


In [3]:
df.head()

Unnamed: 0,Company,Question
0,TCS,Q. How do you maintain company coding standards?
1,TCS,Q. Explain the Python installation process.
2,TCS,Q. Explain OOP concepts.
3,TCS,Q. What is DevOps?
4,TCS,Q. Write a Python code to reverse the last k d...


In [4]:

# Optional: Filter out empty questions
df = df[df["Question"].notna()]

# Convert each question to a LangChain Document object
documents = [
    Document(
        page_content=row["Question"],
        metadata={"company": row["Company"]}
    )
    for _, row in df.iterrows()
]

# Initialize embedding model (MiniLM)
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create FAISS index from documents
faiss_index = FAISS.from_documents(documents, embedding_model)

# Save FAISS index to disk
faiss_index.save_local("faiss_index_interview_questions")
print("FAISS index created and saved.")


  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


✅ FAISS index created and saved.


In [None]:

# Load CSV
df = pd.read_csv("top_companies.csv")

# Combine fields into a text string
documents = []
for _, row in df.iterrows():
    content = f"""
    Company: {row['Company']}
    Rating: {row.get('Rating', 'N/A')}
    Reviews: {row.get('Reviews', 'N/A')}
    Industry & Location: {row.get('Industry & Location', 'N/A')}
    """
    documents.append(Document(page_content=content.strip(), metadata={"company": row["Company"]}))

# Embedding model
embedder = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create and save FAISS index
company_faiss = FAISS.from_documents(documents, embedder)
company_faiss.save_local("faiss_index_company_details")
print("Company details FAISS index saved.")


Company details FAISS index saved.


In [13]:
from langchain.vectorstores import FAISS

faiss_index = FAISS.load_local(
    "faiss_index_interview_questions",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True  # 👈 this allows pickle loading
)


# Retrieve top 3 similar questions
retriever = faiss_index.as_retriever(search_kwargs={"k": 3})
results = retriever.get_relevant_documents("What is SDLC?")
for doc in results:
    print(doc.page_content)


Q. What is SDLC?
Q. Explain what SDLC is.
Q. What is SDLC and what are its types?


In [9]:
import pandas as pd

df = pd.read_csv("interview_questions.csv")
df = df.drop_duplicates(subset=["Question"])  # Remove duplicates


In [10]:
retrieved_docs = retriever.get_relevant_documents("What is SDLC?")
unique_contents = list(set([doc.page_content for doc in retrieved_docs]))

for content in unique_contents:
    print(content)


Q. What is SDLC?


In [11]:
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Create documents
docs = [Document(page_content=q) for q in df['Question']]

# Initialize embedding model
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create and save index
faiss_index = FAISS.from_documents(docs, embedder)
faiss_index.save_local("faiss_index_interview_questions")
