In [5]:
# !pip install pymongo

In [1]:
import os
import glob
from dotenv import load_dotenv
import pymongo
import pandas as pd

load_dotenv()


# Connect to MongoDB
client = pymongo.MongoClient(os.environ['MONGO_URI'])
db = client["hack_ai_thon"]

# List of CSV files and their corresponding MongoDB collections
csv_files = {
    "sales_data.csv": "Sales_NPD",
    "planning_data.csv": "Planning",
    "purchase_data.csv": "Purchase",
    "stores_data.csv": "Stores",
    "production_data.csv": "Production",
    "maintenance_data.csv": "Maintenance",
    "quality_data.csv": "Quality",
    "dispatch_data.csv": "Dispatch_Logistics",
    "hr_data.csv": "HR_Admin",
    "finance_data.csv": "Accounts_Finance",
    "settings_data.csv": "Settings",
    "employees_data.csv" : "Employees"
}

In [2]:
collection_name = "Employees"
collection = db[collection_name] 
df = pd.read_csv("db/employees_data.csv")
data = df.to_dict(orient="records")
collection.insert_many(data)
print(f"Uploaded {len(data)} records to {collection_name}")

Uploaded 100 records to Employees


In [4]:
# Function to upload CSV data to MongoDB
def upload_to_mongodb(csv_filename, collection_name):
    collection = db[collection_name] 
    df = pd.read_csv(csv_filename)
    data = df.to_dict(orient="records")
    collection.insert_many(data)
    print(f"Uploaded {len(data)} records to {collection_name}")

for path in glob.glob("db/*.csv"):
    upload_to_mongodb(path, csv_files[path.split('\\')[-1]])
print("All data uploaded successfully to MongoDB.")

Uploaded 500 records to Dispatch_Logistics
Uploaded 500 records to Accounts_Finance
Uploaded 500 records to HR_Admin
Uploaded 500 records to Maintenance
Uploaded 500 records to Planning
Uploaded 500 records to Production
Uploaded 500 records to Purchase
Uploaded 500 records to Quality
Uploaded 500 records to Sales_NPD
Uploaded 10 records to Settings
Uploaded 500 records to Stores
All data uploaded successfully to MongoDB.


In [6]:
# !pip install sentence_transformers

In [13]:
import json
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

csv_filename = "db/dispatch_data.csv"
collection = db["Dispatch_Logistics"] 
df = pd.read_csv(csv_filename)

In [14]:
data = df.to_dict(orient="records")
for obj in data:
    obj["embedding"] = [float(num) for num in model.encode(json.dumps(obj))]

collection.insert_many(data)
print(f"Uploaded {len(data)} records to {"Dispatch_Logistics"}")

Uploaded 500 records to Dispatch_Logistics


In [30]:
query_embedding = [float(num) for num in model.encode("Blair-Hart,2024-08-22,TN288647")]

In [None]:
# db['Dispatch_Logistics'].aggregate([
#     {
#         "$search": {
#             "index": "custom",
#             "knn": {
#                 "query": [float(num) for num in model.encode("Blair-Hart,2024-08-22,TN288647")],
#                 "path": "embedding",
#                 "k": 10
#             },
#         },
#     }
# ])

In [33]:
docs = [obj for obj in db['Dispatch_Logistics'].find()]

In [39]:
import numpy as np

def cosine_similarity(vec1, vec2):
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    
    # Compute cosine similarity
    similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
    return similarity

for obj in docs:
    similarity_score = cosine_similarity(query_embedding, obj['embedding'])
    if similarity_score > 0.2:
        print("Cosine Similarity:", similarity_score)
        print(obj)

Cosine Similarity: 0.23184310293473076
{'_id': ObjectId('67de9db280fcafaf3fb043d6'), 'DispatchID': '2cb9467b-783e-458c-af34-1fee926bb699', 'SalesID': '4adbe0f8-e7fd-48df-a4f2-28b57e278cb1', 'TransportPartner': 'Mccall PLC', 'DeliveryDate': '2025-03-05', 'TrackingNumber': 'TN730052', 'embedding': [-0.06123426556587219, 0.025295745581388474, -0.009473802521824837, 0.05839458480477333, -0.06125112622976303, 0.033025894314050674, 0.026256799697875977, -0.020014215260744095, 0.005510998889803886, -0.011621247977018356, 0.01807376742362976, -0.06284930557012558, -0.08456885814666748, 0.019542595371603966, 0.010777391493320465, 0.02231876365840435, -0.02404117025434971, -0.030994048342108727, -0.019503522664308548, -0.04671274498105049, 0.007592469919472933, 0.05873750522732735, -0.047986261546611786, -0.033207669854164124, -0.05186782032251358, -0.02682497724890709, -0.07167576998472214, 0.00857531651854515, -0.024061042815446854, -0.01432101707905531, 0.07662054151296616, -0.046493932604789