## MongoDB Connection Demo

We need to convert the **CSV file to a MongoDB collection**. The following code snippet demonstrates how to connect to a MongoDB database and insert data from a CSV file into a collection.



In [1]:
import pandas as pd
import os
import sys

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
data = pd.read_csv('dataset/Visa.csv')
data.head()

Unnamed: 0,case_id,continent,education_of_employee,has_job_experience,requires_job_training,no_of_employees,yr_of_estab,region_of_employment,prevailing_wage,unit_of_wage,full_time_position,case_status
0,EZYV01,Asia,High School,N,N,14513,2007,West,592.2029,Hour,Y,Denied
1,EZYV02,Asia,Master's,Y,N,2412,2002,Northeast,83425.65,Year,Y,Certified
2,EZYV03,Asia,Bachelor's,N,Y,44444,2008,West,122996.86,Year,Y,Denied
3,EZYV04,Asia,Bachelor's,N,N,98,1897,West,83434.03,Year,Y,Denied
4,EZYV05,Africa,Master's,Y,N,1082,2005,South,149907.39,Year,Y,Certified


In [4]:
data.columns

Index(['case_id', 'continent', 'education_of_employee', 'has_job_experience',
       'requires_job_training', 'no_of_employees', 'yr_of_estab',
       'region_of_employment', 'prevailing_wage', 'unit_of_wage',
       'full_time_position', 'case_status'],
      dtype='object')

In [5]:
data.shape

(25480, 12)

In [6]:
data = data.to_dict(orient = "records") # orient = "records" converts the DataFrame to a list of dictionaries

In [7]:
# Displaying first 3 records

data[:3]

[{'case_id': 'EZYV01',
  'continent': 'Asia',
  'education_of_employee': 'High School',
  'has_job_experience': 'N',
  'requires_job_training': 'N',
  'no_of_employees': 14513,
  'yr_of_estab': 2007,
  'region_of_employment': 'West',
  'prevailing_wage': 592.2029,
  'unit_of_wage': 'Hour',
  'full_time_position': 'Y',
  'case_status': 'Denied'},
 {'case_id': 'EZYV02',
  'continent': 'Asia',
  'education_of_employee': "Master's",
  'has_job_experience': 'Y',
  'requires_job_training': 'N',
  'no_of_employees': 2412,
  'yr_of_estab': 2002,
  'region_of_employment': 'Northeast',
  'prevailing_wage': 83425.65,
  'unit_of_wage': 'Year',
  'full_time_position': 'Y',
  'case_status': 'Certified'},
 {'case_id': 'EZYV03',
  'continent': 'Asia',
  'education_of_employee': "Bachelor's",
  'has_job_experience': 'N',
  'requires_job_training': 'Y',
  'no_of_employees': 44444,
  'yr_of_estab': 2008,
  'region_of_employment': 'West',
  'prevailing_wage': 122996.86,
  'unit_of_wage': 'Year',
  'full_t

In [8]:
len(data)

25480

### Establishing connection to MongoDB

Inside a cluster, we need to create a database and inside it we should create a collection.

In [11]:
DB_NAME = "BenGJ"
COLLECTION_NAME = "UsVisaData"
COLLECTION_URL = os.getenv("MONGO_DB_URL")

In [12]:
import pymongo

client = pymongo.MongoClient(COLLECTION_URL)
database = client[DB_NAME]
collection = database[COLLECTION_NAME]

In [13]:
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [15]:
record = collection.insert_many(data)

In [17]:
# Retrieving first 5 records

records = collection.find().limit(5)
for record in records:
    print(record)

{'_id': ObjectId('689b1304a1b8070308694f22'), 'case_id': 'EZYV01', 'continent': 'Asia', 'education_of_employee': 'High School', 'has_job_experience': 'N', 'requires_job_training': 'N', 'no_of_employees': 14513, 'yr_of_estab': 2007, 'region_of_employment': 'West', 'prevailing_wage': 592.2029, 'unit_of_wage': 'Hour', 'full_time_position': 'Y', 'case_status': 'Denied'}
{'_id': ObjectId('689b1304a1b8070308694f23'), 'case_id': 'EZYV02', 'continent': 'Asia', 'education_of_employee': "Master's", 'has_job_experience': 'Y', 'requires_job_training': 'N', 'no_of_employees': 2412, 'yr_of_estab': 2002, 'region_of_employment': 'Northeast', 'prevailing_wage': 83425.65, 'unit_of_wage': 'Year', 'full_time_position': 'Y', 'case_status': 'Certified'}
{'_id': ObjectId('689b1304a1b8070308694f24'), 'case_id': 'EZYV03', 'continent': 'Asia', 'education_of_employee': "Bachelor's", 'has_job_experience': 'N', 'requires_job_training': 'Y', 'no_of_employees': 44444, 'yr_of_estab': 2008, 'region_of_employment': 'We

### Converting to Dataframe 

In [18]:
df = pd.DataFrame(list(collection.find()))
df.head()

Unnamed: 0,_id,case_id,continent,education_of_employee,has_job_experience,requires_job_training,no_of_employees,yr_of_estab,region_of_employment,prevailing_wage,unit_of_wage,full_time_position,case_status
0,689b1304a1b8070308694f22,EZYV01,Asia,High School,N,N,14513,2007,West,592.2029,Hour,Y,Denied
1,689b1304a1b8070308694f23,EZYV02,Asia,Master's,Y,N,2412,2002,Northeast,83425.65,Year,Y,Certified
2,689b1304a1b8070308694f24,EZYV03,Asia,Bachelor's,N,Y,44444,2008,West,122996.86,Year,Y,Denied
3,689b1304a1b8070308694f25,EZYV04,Asia,Bachelor's,N,N,98,1897,West,83434.03,Year,Y,Denied
4,689b1304a1b8070308694f26,EZYV05,Africa,Master's,Y,N,1082,2005,South,149907.39,Year,Y,Certified
