In [1]:
# Dataset
import pandas as pd

# LLM
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import TextLoader
from langchain.document_loaders import WebBaseLoader

# notebook configurations
pd.options.display.max_colwidth = 1000

In [3]:
df = pd.read_csv("mathdial.tsv", sep = '\t')

In [6]:
df.sample(2)

Unnamed: 0,qid,scenario,question,ground_truth,student_incorrect_solution,student_profile,teacher_described_confusion,self-correctness,self-typical-confusion,self-typical-interactions,conversation
2403,5000802,4,"Shawn collected 40 plain pebbles. He painted 9 pebbles red and 13 pebbles blue. He then divided the remaining pebbles equally into 3 groups, and painted them purple, yellow, and green. What is the difference between the number of blue and yellow pebbles?","After painting the red and blue pebbles, Shawn had a remainder of 40 - 9 - 13 = 18 pebbles.\nHe painted 18/3 = 6 pebbles yellow.\nThe difference between the number of blue and yellow pebbles is 13 - 6 = 7.\n 7","Shawn painted a total of 9 + 13 = 22 pebbles.\nHe has 40 - 22 = 18 pebbles left.\nHe divided the remaining 18 pebbles equally into 3 groups, so each group has 18/3 = 6 pebbles.\nHe painted these 6 pebbles yellow, purple, and green, so he painted a total of 13 + 6 = 19 pebbles yellow.\nThe difference between the number of blue and yellow pebbles is 19 - 13 = 6.\n 6",Riya is a 7th grade student. She struggle most with understanding what the problem is asking them to do.,Went too far in calculating number of yellow pebbles,Yes,4.0,4.0,"Teacher: (generic)Hi Riyi. Shawn painted 9 red pebbles, 13 blue pebbles, and the 18 remaining pebbles into 3 equal groups of 6 yellow, 6 purple, and 6 green pebbles. Correct?|EOM|Student: Yes, that is correct.|EOM|Teacher: (probing)What is the difference between the 13 blue pebbles and the 6 yellow pebbles?|EOM|Student: The difference between the 13 blue pebbles and the 6 yellow pebbles is 13 - 6 = 7.|EOM|Teacher: (generic)Correct! Good Work!"
1225,5000992,4,"Jaco wants to buy Christmas gifts for his 8 friends that are worth $9 each. Also, he wants to buy gifts for his mother and father that are of the same amount. He has a $100 budget to buy all gifts. How much is Jaco's budget for each of his mother and father's gift?","Jaco will spend a total of $9 x 8 = $72 for his 8 friends' gifts.\nHe will be left with $100 - $72 = $28 for his mother and father's gift.\nSo, Jaco's budget for each of his mother and father's gift is $28/2 = $14.\n 14","Jaco is buying gifts for 8 friends, so he will spend 8 x $9 = $72 on those gifts.\nHe also needs to buy gifts for his mother and father, which will cost another $9 x 2 = $18.\nSo, Jaco will spend a total of $72 + $18 = $90 on all the gifts.\nJaco's budget for his mother and father's gift is $100 - $90 = $10.\nTherefore, Jaco's budget for each of his mother and father's gift is $10. \n 10",Riya is a 7th grade student. She has problem with understanding of what steps or procedures are required to solve a problem.,The student forgot to subtract the money already spent on the friends' gifts.,Yes,2.0,3.0,"Teacher: (probing)Hi Riya, I see you wee able to determine that Jaco spent $72 on his 8 friends' gifts. If he had $100, how much does he now have left? |EOM|Student: Jaco has $100 - $72 = $28 left.|EOM|Teacher: (focus)Yes, he has $28 remaining. It says he buys gifts for his mom and dad spending an equal amount on each. So the question now asks what his budget it for his mother and father's gifts? |EOM|Student: Jaco's budget for his mother and father's gift is $28 divided by 2, which is $14 each.|EOM|Teacher: (probing)Yes. Can you see where you went wrong initially? |EOM|Student: Yes, I initially thought that Jaco would spend $18 on his mother and father's gifts, but I forgot to subtract the $72 he spent on his 8 friends' gifts from the $100 budget. So, his budget for his mother and father's gift is actually $28."


In [7]:
loader = WebBaseLoader("https://github.com/bzekeria/dsc250-lmtutor/blob/main/data/mathdial.tsv") ### many doc loaders
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

**The cell below is where the kernel dies**. I adjusted the parameters accordingly & the kernel still fails. Hopefully it works locally on your computer (I know my computer is low on memory smh).

*I also tried using DataHub due to some NVIDIA drive issue*

In [None]:
## gpu
embeddings = HuggingFaceInstructEmbeddings(query_instruction="Represent the query for retrieval: ", model_kwargs={'device':'cuda:0'}, encode_kwargs={'batch_size':8})

## cpu
# embeddings = HuggingFaceInstructEmbeddings(query_instruction="Represent the query for retrieval: ", model_kwargs={'device':'cpu'}, encode_kwargs={'batch_size':32})

In [None]:
vectorstore = FAISS.from_documents(documents, embeddings)

In [None]:
vectorstore.save_local("https://github.com/bzekeria/dsc250-lmtutor/blob/main/data/mathdial.tsv")

In [None]:
FAISS.load_local("https://github.com/bzekeria/dsc250-lmtutor/blob/main/data/mathdial.tsv", embeddings = embeddings)

In [None]:
# randomly sampling the question
query = df["question"].sample(1)
query

In [None]:
answer = vectorstore.similarity_search(query)