# Extracting Data from https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory and preprocessing and saving onto a MongoDB database

In [1]:
# import libraries
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient
import certifi

In [2]:
# create a dataframe from the csv
path ="C:/UC_Berkeley\Analysis_Projects_Class_Folder/Module20_Final_Project\Data_Kaggle_Historical/coin_Dogecoin.csv"
df = pd.read_csv(path)
df

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Dogecoin,DOGE,2013-12-16 23:59:59,0.000866,0.000150,0.000299,0.000205,0.000000e+00,1.509085e+06
1,2,Dogecoin,DOGE,2013-12-17 23:59:59,0.000289,0.000116,0.000207,0.000269,0.000000e+00,2.169688e+06
2,3,Dogecoin,DOGE,2013-12-18 23:59:59,0.000362,0.000205,0.000267,0.000362,0.000000e+00,3.188943e+06
3,4,Dogecoin,DOGE,2013-12-19 23:59:59,0.001520,0.000328,0.000395,0.001162,0.000000e+00,1.115034e+07
4,5,Dogecoin,DOGE,2013-12-20 23:59:59,0.001143,0.000662,0.001143,0.000704,0.000000e+00,7.284337e+06
...,...,...,...,...,...,...,...,...,...,...
2755,2756,Dogecoin,DOGE,2021-07-02 23:59:59,0.247997,0.238848,0.243982,0.245264,1.321471e+09,3.194925e+10
2756,2757,Dogecoin,DOGE,2021-07-03 23:59:59,0.250214,0.242454,0.245106,0.246411,9.170158e+08,3.210491e+10
2757,2758,Dogecoin,DOGE,2021-07-04 23:59:59,0.252567,0.243425,0.246425,0.246483,9.735115e+08,3.211767e+10
2758,2759,Dogecoin,DOGE,2021-07-05 23:59:59,0.246419,0.227838,0.246419,0.231614,1.267949e+09,3.018344e+10


In [3]:
# checking for mull values
df.isnull().sum().sum()
df.isna().sum()

SNo          0
Name         0
Symbol       0
Date         0
High         0
Low          0
Open         0
Close        0
Volume       0
Marketcap    0
dtype: int64

In [4]:
# dropping irrelevant columns
df = df.drop(columns=['SNo', 'Symbol'])
df.head()

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Dogecoin,2013-12-16 23:59:59,0.000866,0.00015,0.000299,0.000205,0.0,1509085.0
1,Dogecoin,2013-12-17 23:59:59,0.000289,0.000116,0.000207,0.000269,0.0,2169688.0
2,Dogecoin,2013-12-18 23:59:59,0.000362,0.000205,0.000267,0.000362,0.0,3188943.0
3,Dogecoin,2013-12-19 23:59:59,0.00152,0.000328,0.000395,0.001162,0.0,11150340.0
4,Dogecoin,2013-12-20 23:59:59,0.001143,0.000662,0.001143,0.000704,0.0,7284337.0


In [5]:
# converting date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [6]:
# data being analyzed for last five years, so filtering data for that date range
start = '2016-07-06'
end = '2021-07-06'

In [7]:
# creating a variable to store that date range
fiveyears = (df['Date']>start) & (df['Date']<= end)

In [8]:
# creating a new dataframe after applying the above filters
dodgecoin_df = df.loc[fiveyears]
dodgecoin_df

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
933,Dogecoin,2016-07-06 23:59:59,0.000290,0.000278,0.000288,0.000284,1.777410e+06,2.980788e+07
934,Dogecoin,2016-07-07 23:59:59,0.000284,0.000267,0.000284,0.000275,2.387940e+05,2.885207e+07
935,Dogecoin,2016-07-08 23:59:59,0.000281,0.000272,0.000275,0.000277,2.474120e+05,2.915545e+07
936,Dogecoin,2016-07-09 23:59:59,0.000279,0.000265,0.000276,0.000270,1.463280e+05,2.840995e+07
937,Dogecoin,2016-07-10 23:59:59,0.000274,0.000267,0.000270,0.000272,1.166160e+05,2.860405e+07
...,...,...,...,...,...,...,...,...
2754,Dogecoin,2021-07-01 23:59:59,0.260092,0.238677,0.253828,0.244549,2.463947e+09,3.185285e+10
2755,Dogecoin,2021-07-02 23:59:59,0.247997,0.238848,0.243982,0.245264,1.321471e+09,3.194925e+10
2756,Dogecoin,2021-07-03 23:59:59,0.250214,0.242454,0.245106,0.246411,9.170158e+08,3.210491e+10
2757,Dogecoin,2021-07-04 23:59:59,0.252567,0.243425,0.246425,0.246483,9.735115e+08,3.211767e+10


In [9]:
# exporting the dataframe to csv
dodgecoin_df.to_csv('dodgecoin(fiveyears).csv')

# Storing file in MongoDB (DB name : Finale)

In [10]:
# Read in mongodb server location as client
client = MongoClient("mongodb+srv://Group7:Finale@finalsegment1.690c0.mongodb.net/bitcoin_db?retryWrites=true&w=majority",tlsCAFile=certifi.where())

In [11]:
# Find Databases
client.list_database_names()

['Bitcoin_db',
 'Cardano_DB',
 'Ethereum_db',
 'Finale',
 'Tether_DB',
 'XRP_DB',
 'admin',
 'local']

In [12]:
# connecting to the db on MongoDb and creating collection in the db
db = client['Finale']
collection = db['Dodgecoin']

In [13]:
# adding dataframe to mongoDb
dodgecoin_df.reset_index(inplace=True)
dodgecoin_df_dict = dodgecoin_df.to_dict("records")

In [14]:
# Insert collection
collection.insert_many(dodgecoin_df_dict)

<pymongo.results.InsertManyResult at 0x2cfad92d380>