## Import Packages

In [1]:
import warnings

from datetime import datetime

import numpy as np
import pandas as pd

import pymongo
from pymongo import MongoClient

## Connect to MongoDB

In [2]:
username = 'username'
password = 'password'
client = MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password))

## Databases

In [3]:
warnings.filterwarnings("ignore")

client.database_names()

['admin', 'config', 'local']

## Create Collection

In [4]:
db = client['admin']
drugs = db['drugs']

## Data

In [6]:
data = pd.read_csv(".data/drugsComTrain_raw.tsv", delimiter = "\t", header = 0, names = [None, "drugName", "condition", "review", "rating", "date", "usefulCount"])
data

Unnamed: 0,NaN,drugName,condition,review,rating,date,usefulCount
0,206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9.0,"May 20, 2012",27
1,95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8.0,"April 27, 2010",192
2,92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5.0,"December 14, 2009",17
3,138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8.0,"November 3, 2015",10
4,35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9.0,"November 27, 2016",37
...,...,...,...,...,...,...,...
161292,191035,Campral,Alcohol Dependence,"""I wrote my first report in Mid-October of 201...",10.0,"May 31, 2015",125
161293,127085,Metoclopramide,Nausea/Vomiting,"""I was given this in IV before surgey. I immed...",1.0,"November 1, 2011",34
161294,187382,Orencia,Rheumatoid Arthritis,"""Limited improvement after 4 months, developed...",2.0,"March 15, 2014",35
161295,47128,Thyroid desiccated,Underactive Thyroid,"""I&#039;ve been on thyroid medication 49 years...",10.0,"September 19, 2015",79


## Preprocess Data

### Label Encoding

In [7]:
data['rating'] = data['rating'].apply(lambda x: 1 if x >= 5.0 else 0)

### Change Date Format

In [8]:
data['date'] = data['date'].apply(lambda x: datetime.strptime(x,"%B %d, %Y").strftime("%Y/%m/%d"))

### Drop Columns

In [9]:
data.drop(data.columns[[0, 6]], axis = 1, inplace = True)

### Rename Column Names

In [10]:
data.columns = ["DrugName", "Condition", "Review", "Recommend", "Date"]

In [11]:
data

Unnamed: 0,DrugName,Condition,Review,Recommend,Date
0,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",1,2012/05/20
1,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",1,2010/04/27
2,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",1,2009/12/14
3,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",1,2015/11/03
4,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",1,2016/11/27
...,...,...,...,...,...
161292,Campral,Alcohol Dependence,"""I wrote my first report in Mid-October of 201...",1,2015/05/31
161293,Metoclopramide,Nausea/Vomiting,"""I was given this in IV before surgey. I immed...",0,2011/11/01
161294,Orencia,Rheumatoid Arthritis,"""Limited improvement after 4 months, developed...",0,2014/03/15
161295,Thyroid desiccated,Underactive Thyroid,"""I&#039;ve been on thyroid medication 49 years...",1,2015/09/19


### Convert DataFrame to Dictionary

In [12]:
data_dict = data.to_dict(orient = "records")

## Insert Data

In [13]:
drugs.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x7facbce6f408>

## Check Collections

In [14]:
db.list_collection_names()

['system.version', 'drugs', 'system.users']

## Retrieve Data

In [15]:
cursor = db["drugs"].find({})

## Display Data

In [16]:
final_data =  pd.DataFrame(list(cursor))
del final_data['_id']

In [17]:
final_data

Unnamed: 0,DrugName,Condition,Review,Recommend,Date
0,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",1,2012/05/20
1,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",1,2010/04/27
2,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",1,2009/12/14
3,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",1,2015/11/03
4,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",1,2016/11/27
...,...,...,...,...,...
161292,Campral,Alcohol Dependence,"""I wrote my first report in Mid-October of 201...",1,2015/05/31
161293,Metoclopramide,Nausea/Vomiting,"""I was given this in IV before surgey. I immed...",0,2011/11/01
161294,Orencia,Rheumatoid Arthritis,"""Limited improvement after 4 months, developed...",0,2014/03/15
161295,Thyroid desiccated,Underactive Thyroid,"""I&#039;ve been on thyroid medication 49 years...",1,2015/09/19


## Drop Collection

In [5]:
db.drugs.drop()