In [1]:
    # Define columns to extract
columns = [
        "id", "listing_url", "name", "description",
        "neighborhood_overview", "latitude", "longitude", "price",
        "amenities", "beds", "bedrooms", "bathrooms", "bathrooms_text",
        "property_type", "room_type", "host_about"
    ]

In [1]:
from dotenv import load_dotenv
import os

# load_dotenv("variables.env", override=True)

In [2]:
MONGO_CONNECTION_STRING= os.getenv("MONGO_CONNECTION_STRING_DISKANN")


In [4]:
import json
from pymongo import MongoClient

def json_to_mongo(json_file, collection_name, database_name):
    # Load JSON data
    with open(json_file, "r") as f:
        data = json.load(f)

    # Add 'data_to_embed' field
    for item in data:
        item['data_to_embed'] = f"{item.get('name', '')} {item.get('description', '')} {item.get('neighborhood_overview', '')}"
        item['location'] = {
            "type": "Point",
            "coordinates": [float(item.get('longitude', 0)), float(item.get('latitude', 0))]
        }

    # Insert JSON data into MongoDB collection
    client = MongoClient(MONGO_CONNECTION_STRING)
    db = client[database_name]
    collection = db[collection_name]
    collection.insert_many(data)
    client.close()
    print(f"Inserted {len(data)} documents into the '{collection_name}' collection in MongoDB.")

json_to_mongo("data/datasets without embeddings/small_for_testing.json", "listings", "contoso_bookings" )

Inserted 1434 documents into the 'listings' collection in MongoDB.


In [4]:
import csv
import glob
from pymongo import MongoClient

def csv_to_mongo(folder_path, mongo_uri, database_name, collection_name):
    client = MongoClient(mongo_uri)
    db = client[database_name]
    collection = db[collection_name]
    
    combined_data = []
    
    for csv_file in glob.glob(f"{folder_path}/*.csv"):
        with open(csv_file, encoding='utf-8') as csvf:
            csvReader = csv.DictReader(csvf)
            
            for row in csvReader:
                filtered_row = {key: row[key] for key in columns if key in row}
                combined_data.append(filtered_row)

    if combined_data:
        collection.insert_many(combined_data)
        print(f"Inserted {len(combined_data)} documents into the '{collection_name}' collection in MongoDB.")

    client.close()

# Specify parameters
folder_path = 'data/csv'  # Path to the folder containing the CSV files
mongo_uri = MONGO_CONNECTION_STRING
database_name = 'contoso_bookings'  # Replace with your database name
collection_name = 'Listings'  # Replace with your collection name

# Call the function
csv_to_mongo(folder_path, mongo_uri, database_name, collection_name)


Inserted 35864 documents into the 'Listings' collection in MongoDB.
