In [None]:
import weaviate
import weaviate.classes as wvc
import weaviate.classes.config as wvcc
import os
import pandas as pd
import pickle
from scipy.spatial import KDTree
from tqdm import tqdm

In [None]:
client = weaviate.connect_to_local(
    port=8080,
    grpc_port=50051,
)

In [None]:
client

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('jhgan/ko-sroberta-multitask')

# Course Evaluation

## DDL

In [None]:
with open("../data/course_evaluations.pkl", "rb") as file:
    ce = pickle.load(file)

In [None]:
'''
if (client.collections.exists("CourseName")):
  # delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
  client.collections.delete("CourseName")  # Replace with your collection name
if (client.collections.exists("ProfessorName")):
  # delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
  client.collections.delete("ProfessorName")  # Replace with your collection name
if (client.collections.exists("Course")):
  # delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
  client.collections.delete("Course")  # Replace with your collection name
if (client.collections.exists("CourseEvaluation")):
  # delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
  client.collections.delete("CourseEvaluation")  # Replace with your collection name
  '''

In [None]:
client.collections.create("CourseName",
                          properties=[
                              wvcc.Property(name="course_name", data_type=wvcc.DataType.TEXT)
                          ])

In [None]:
client.collections.create("ProfessorName",
                          properties=[
                              wvcc.Property(name="professor_name", data_type=wvcc.DataType.TEXT)
                          ])
                          

In [None]:
client.collections.create("Course",
                         properties=[
                             wvcc.Property(name="course_name", data_type=wvcc.DataType.TEXT),
                             wvcc.Property(name="professor_name", data_type=wvcc.DataType.TEXT)
                         ])

In [None]:
client.collections.create("CourseEvaluation",
                          properties=[
                              wvcc.Property(
                                  name="evaluations", 
                                  data_type=wvcc.DataType.TEXT,
                              ),
                              wvcc.Property(
                                  name="course_name", 
                                  data_type=wvcc.DataType.TEXT,
                              ),
                              wvcc.Property(
                                  name="professor_name", 
                                  data_type=wvcc.DataType.TEXT,
                              )
                          ],
                          references=[
                              wvcc.ReferenceProperty(
                                  name="course",
                                  target_collection="Course"
                              ),
                          ])

## Put Data into Collection

In [None]:
course_name = client.collections.get("CourseName")
professor_name = client.collections.get("ProfessorName")
course = client.collections.get("Course")
course_evaluation = client.collections.get("CourseEvaluation")

In [None]:
for d in tqdm(ce):
    cn_query = course_name.query.bm25(query=d["name"])
    if cn_query.objects == []:
        course_name_id = course_name.data.insert(
            {
                "course_name": d["name"]
            },
            vector=model.encode(d["name"]).tolist()
        )
    else:
        course_name_id = cn_query.objects[0].uuid

    pf_query = professor_name.query.bm25(query=d["prof"])
    if pf_query.objects == []:
        professor_name_id = professor_name.data.insert(
            {
                "professor_name": d["prof"]
            },
            vector=model.encode(d["prof"]).tolist()
        )
    else:
        professor_name_id = pf_query.objects[0].uuid
        
    course_id = course.data.insert( 
        properties={
            "course_name": d["name"],
            "professor_name": d["prof"]
        }    
    )
    for e in d["eval"]:
        course_evaluation_id = course_evaluation.data.insert(
            properties={
                "evaluations": e,
                "course_name": d["name"],
                "professor_name": d["prof"]
            },
            references={
                "course": course_id
            },
            vector=model.encode(e).tolist()
        )
    

## Queries

# Address Location

## DDL

In [None]:
import json

In [None]:
with open("../data/address_data.json") as f:
    address_location_json = json.load(f)

In [None]:
address_location_json;

In [None]:
if (client.collections.exists("AddressLocation")):
  client.collections.delete("AddressLocation")  # Replace with your collection name

In [None]:
client.collections.create("AddressLocation",
                          properties=[
                              wvcc.Property(name="department_name", data_type=wvcc.DataType.TEXT)
                          ])

In [None]:
address_location = client.collections.get("AddressLocation")

In [None]:
for d in tqdm(address_location_json):
    address_location.data.insert(
        { "department_name": d["부서명"] },
        vector = model.encode(d['부서명']).tolist()
    )

In [None]:
near_vector = model.encode("컴공").tolist()
response = address_location.query.near_vector(
    near_vector=near_vector,
    limit=3,
)

In [None]:
response

# Location

In [None]:
import json
with open("../data/building_data.json") as f:
    buildings_dict = json.load(f)

In [None]:
if (client.collections.exists("Buildings")):
  client.collections.delete("Buildings")  # Replace with your collection name

In [None]:
client.collections.create("Buildings",
                          properties=[
                              wvcc.Property(name="primary_building_name", data_type=wvcc.DataType.TEXT),
                              wvcc.Property(name="building_name", data_type=wvcc.DataType.TEXT),
                              wvcc.Property(name="coordinates", data_type=wvcc.DataType.GEO_COORDINATES)
                          ])

In [None]:
buildings = client.collections.get("Buildings")

In [None]:
for d in tqdm(buildings_dict):
    building_name = ";".join(d['건물명'])
    buildings.data.insert(
        { 
            "primary_building_name": d['건물명'][0],
            "building_name": building_name,
            "coordinates": {
                "latitude": d['위치']['위도'],
                "longitude": d['위치']['경도']
            }
        },
        vector = model.encode(building_name).tolist()
    )

In [None]:
near_vector = model.encode("정문").tolist()
response = buildings.query.near_vector(
    near_vector=near_vector,
    limit=1,
)
dict(response.objects[0].properties['coordinates'])

In [None]:
for k in client.collections.list_all():
    print(k)