# MongoDB Data Import
This notebook will populate the MongoDB database with the EDDB data.
## Instructions
**Before attempting to run this notebook ensure the data folder contains the *_stripped.json i.e. (commodities_stripped.json) data files. These files are preprocessed to remove unneeded fields and add the denormalized fields.** 

1. Start a local MongoDB database and ensure the connection details below point to the correct port.
2. Run all of the cells in the notebook.

In [None]:
import pymongo
import json
import csv

import sys
!{sys.executable} -m pip install tqdm
from tqdm import tqdm

from pymongo import MongoClient

In [None]:
# To establish connection:
db = MongoClient('localhost', 27017)['EDDB']


In [None]:
# Populates the database with the commodities, factions, modules, stations, and systems_populated collections

files = ['commodities', 'factions', 'modules', 'stations', 'systems_populated']

for x in files:
    print(x)
    with open(f"{'../data/' + x}_stripped.json") as f:
        db[x].insert_many(json.load(f))

In [None]:
# Function to convert a CSV to JSON
def make_json(data, csvFilePath): 
      
    # Open a csv reader called DictReader 
    with open(csvFilePath, encoding='utf-8') as csvf: 
        csvReader = csv.DictReader(csvf) 
          
        # Convert each row into a dictionary  
        # and add it to data 
        for rows in tqdm(csvReader, desc="Reading file"): 
              
            # Assuming a column named 'No' to 
            # be the primary key 
            key = rows['id'] 
            data[key] = rows

In [None]:
# import listing data that is in csv file, data needs to be imported in chunks
    
chunk_size = 1000

data_dict = {}
make_json(data_dict, '../data/listings_stripped.csv')


data_to_insert = []
for key in tqdm(data_dict, desc="Converting data to correct list of JSON..."):
    data_to_insert.append(data_dict[key])

for dic in tqdm(data_to_insert, desc="Converting strings to ints..."):
    for key in dic:
        if(key == 'commodity_name'):
            continue
        if(dic[key] == ''):
            dic[key] = 0
        else:
            dic[key] = int(dic[key])
            
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in tqdm(range(0, len(lst), n), desc="Chunking..."):
        yield lst[i:i + n]

chunks = list(chunks(data_to_insert, chunk_size))
        
for chunk in tqdm(chunks, desc="Uploading Chunks"):
    db['listings'].insert_many(chunk)