In [2]:
from sklearn.metrics.pairwise import cosine_similarity as cos
from sentence_transformers import SentenceTransformer as ST
import numpy as np
import re

from database_connect import client # gets MongoDB client, which gives access to data
# collection: data called from MongoDB
# json: data in json
class Memories:
    def __init__(self):
        self.collection = client["Memory"]["Memory0"]
        self.json = list(client["Memory"]["Memory0"].find())

    def get_memory(self, question):
        # returns memory that fits the question
        return list(self.collection.find({"Question": question}))[0]

    def get_questions(self):
        return [question["Question"] for question in self.json]

    # single memorys
    def get_feedback_at_index(self,i):
        return list(self.collection.find().skip(i).limit(1))[0]

    # update feedback for single memory
    # add memory if it is not already in the database
    def update_memory_feedback(self,question , feedback):
        if self.collection.count_documents({"Question": question}) > 0:
            memory = self.get_single_memory(question)
            memory["Feedback"].append(feedback)
        else:
            new_memory = {
                "Question": question,
                "Feedback": [feedback],
            }
            self.collection.insert_one(new_memory)
            print("the question and the feedback has been added to memory")


Pinged your deployment. You successfully connected to MongoDB!


In [3]:
# Generate sentence embeddings for all the keys in the JSON file.
# does cosine similarity for Questions ONLY
# returns most similar memory and  feedback
# if the question is already in the database, it will still be returned with this function ( similarity will equal 1)
def find_most_similar_memory(query):
    # get Memory in both collection and JSON format
    memories0 = Memories()
    model = ST('all-MiniLM-L6-v2')

    # Load the JSON file.
    # memory_json = memories0.json

    # Preprocess the query to all lowercase.
    query = query.lower()

    # embed the query
    query_embed = model.encode(query)

    print(query)

    # Get all of the questions that are in the database
    questions = memories0.get_questions()

    # embed the memory's questions into vector representation
    memory_embeds = model.encode(questions)

    # calculate the cosine similarity of each embed from memory compared to the query embed
    cos_sim = cos([query_embed], memory_embeds)

    # get the index of the question with the highest similarity score
    most_similar_question_index = int(np.argmax(cos_sim))

    most_similar_question = memories0.get_feedback_at_index(most_similar_question_index)["Question"]
    most_similar_feedback = memories0.get_feedback_at_index(most_similar_question_index)["Feedback"]

    return most_similar_question, most_similar_feedback

In [4]:
find_most_similar_memory("What is 1 + 1")

what is 1 + 1


('Whats 9 + 10', ['add the numbers, do NOT append'])

In [5]:
model = ST('all-MiniLM-L6-v2')

q1 = "Counting forward and backward"
q2 = "Counting objects"
q3 = "Skip counting by 2s and 5s"
q4 = "Adding and subtracting fractions"
q5 = "Solving problems with comparing and ordering fractions"

qs = [q1,q2,q3,q4,q5]

ques = "count to 10 by 2s"

query_embed = model.encode(ques)

memory_embeds = model.encode(qs)

cos_sim = cos([query_embed], memory_embeds)
cos_sim


array([[0.3325963 , 0.42780197, 0.6307756 , 0.23499155, 0.21322174]],
      dtype=float32)

In [6]:
Mem = Memories()

In [7]:
Mem.json


[{'_id': ObjectId('653d2f7af5491e13934fee5e'),
  'Question': 'Solve for x: 3x + 5 = 20',
  'Feedback': []},
 {'_id': ObjectId('653d2fd0f5491e1393502e79'),
  'Question': 'Level 3 (Difficulty: Moderate):\n\nFind the area of a rectangle with a length of 8 units and a width of 5 units. \n\nExplain how you got your answer.',
  'Feedback': ['too long']},
 {'_id': ObjectId('653d77cff5491e1393898391'),
  'Question': 'Calculate the area of a rectangle with length 5 and width 8.',
  'Feedback': []},
 {'_id': ObjectId('653d7835f5491e139389d136'),
  'Question': 'level 2 (difficulty: easy-moderate):\n\nadd the following two-digit numbers:\n\n1. 23 + 17\n\nexplain how you got your answer.',
  'Feedback': ['']},
 {'_id': ObjectId('653d7840f5491e139389d8f4'),
  'Question': 'Level 5 (Difficulty: Hard):\nSarah has 25 jars of candy. Each jar contains 35 pieces of candy. How many pieces of candy does Sarah have in total? Explain how you got your answer.',
  'Feedback': ['give more explanation']},
 {'_id':

In [9]:
my_strings = np.array([1, "world", "this is a longer string"], dtype=object)
print(my_strings) 

[1 'world' 'this is a longer string']


In [10]:
my_strings

array([1, 'world', 'this is a longer string'], dtype=object)

In [13]:
a = np.empty(shape=(3,0))

In [16]:
np.append(my_strings,[a],axis=0)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 3 dimension(s)

In [21]:
arr = np.array([[1, 2, 3]])
# New row to add
new_row = [4, 5, "str"]

In [22]:

# Stack the new row vertically to the array using np.vstack()
new_arr1 = np.vstack([arr, new_row])
print(new_arr1)

[['1' '2' '3']
 ['4' '5' 'str']]


In [20]:
# Append the new row to the bottom of the array using np.append()
new_arr2 = np.append(arr, [new_row], axis=0)
print(new_arr2)

[[1 2 3]
 [4 5 6]]


In [34]:
new_arr1[:,0:2]

array([['1', '2'],
       ['4', '5']], dtype='<U11')

In [41]:
arr = np.empty((0,3))
new_arr2 = np.append(arr, [new_row], axis=0)
new_arr2 = np.append(new_arr2, [new_row], axis=0)

print(new_arr2)

[['4' '5' 'str']
 ['4' '5' 'str']]
