# Extracting Data from https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory and preprocessing and saving onto a MongoDB database

In [2]:
# import libraries
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient
import certifi

In [3]:
# create a dataframe from the csv
path ="C:/UC_Berkeley\Analysis_Projects_Class_Folder/Module20_Final_Project\Data_Kaggle_Historical/coin_Cardano.csv"
df = pd.read_csv(path)
df

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Cardano,ADA,2017-10-02 23:59:59,0.030088,0.019969,0.024607,0.025932,5.764130e+07,6.288991e+08
1,2,Cardano,ADA,2017-10-03 23:59:59,0.027425,0.020690,0.025757,0.020816,1.699780e+07,5.396927e+08
2,3,Cardano,ADA,2017-10-04 23:59:59,0.022806,0.020864,0.020864,0.021931,9.000050e+06,5.686195e+08
3,4,Cardano,ADA,2017-10-05 23:59:59,0.022154,0.020859,0.021951,0.021489,5.562510e+06,5.571390e+08
4,5,Cardano,ADA,2017-10-06 23:59:59,0.021542,0.018360,0.021359,0.018539,7.780710e+06,4.806646e+08
...,...,...,...,...,...,...,...,...,...,...
1369,1370,Cardano,ADA,2021-07-02 23:59:59,1.394397,1.286607,1.332942,1.394397,2.159410e+09,4.454587e+10
1370,1371,Cardano,ADA,2021-07-03 23:59:59,1.441714,1.359664,1.394152,1.406836,2.028094e+09,4.494324e+10
1371,1372,Cardano,ADA,2021-07-04 23:59:59,1.493717,1.382153,1.404008,1.458184,1.806362e+09,4.658364e+10
1372,1373,Cardano,ADA,2021-07-05 23:59:59,1.461221,1.379284,1.461221,1.404898,1.759461e+09,4.488134e+10


In [4]:
# checking for mull values
df.isnull().sum().sum()
df.isna().sum()

SNo          0
Name         0
Symbol       0
Date         0
High         0
Low          0
Open         0
Close        0
Volume       0
Marketcap    0
dtype: int64

In [5]:
# dropping irrelevant columns
df = df.drop(columns=['SNo', 'Symbol'])
df.head()

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Cardano,2017-10-02 23:59:59,0.030088,0.019969,0.024607,0.025932,57641300.0,628899100.0
1,Cardano,2017-10-03 23:59:59,0.027425,0.02069,0.025757,0.020816,16997800.0,539692700.0
2,Cardano,2017-10-04 23:59:59,0.022806,0.020864,0.020864,0.021931,9000050.0,568619500.0
3,Cardano,2017-10-05 23:59:59,0.022154,0.020859,0.021951,0.021489,5562510.0,557139000.0
4,Cardano,2017-10-06 23:59:59,0.021542,0.01836,0.021359,0.018539,7780710.0,480664600.0


In [6]:
# converting date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [7]:
# data being analyzed for last five years, so filtering data for that date range
start = '2016-07-06'
end = '2021-07-06'

In [8]:
# creating a variable to store that date range
fiveyears = (df['Date']>start) & (df['Date']<= end)

In [9]:
# creating a new dataframe after applying the above filters
cardano_df = df.loc[fiveyears]
cardano_df

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Cardano,2017-10-02 23:59:59,0.030088,0.019969,0.024607,0.025932,5.764130e+07,6.288991e+08
1,Cardano,2017-10-03 23:59:59,0.027425,0.020690,0.025757,0.020816,1.699780e+07,5.396927e+08
2,Cardano,2017-10-04 23:59:59,0.022806,0.020864,0.020864,0.021931,9.000050e+06,5.686195e+08
3,Cardano,2017-10-05 23:59:59,0.022154,0.020859,0.021951,0.021489,5.562510e+06,5.571390e+08
4,Cardano,2017-10-06 23:59:59,0.021542,0.018360,0.021359,0.018539,7.780710e+06,4.806646e+08
...,...,...,...,...,...,...,...,...
1368,Cardano,2021-07-01 23:59:59,1.383814,1.304074,1.383814,1.335611,2.002168e+09,4.266788e+10
1369,Cardano,2021-07-02 23:59:59,1.394397,1.286607,1.332942,1.394397,2.159410e+09,4.454587e+10
1370,Cardano,2021-07-03 23:59:59,1.441714,1.359664,1.394152,1.406836,2.028094e+09,4.494324e+10
1371,Cardano,2021-07-04 23:59:59,1.493717,1.382153,1.404008,1.458184,1.806362e+09,4.658364e+10


In [10]:
# exporting the dataframe to csv
cardano_df.to_csv('cardano(fiveyears).csv')

# Storing file in MongoDB (DB name : Finale)

In [16]:
# Read in mongodb server location as client
client = MongoClient("mongodb+srv://Group7:Finale@finalsegment1.690c0.mongodb.net/bitcoin_db?retryWrites=true&w=majority",tlsCAFile=certifi.where())

In [17]:
# Find Databases
client.list_database_names()

['Bitcoin_db',
 'Cardano_DB',
 'Ethereum_db',
 'Finale',
 'Tether_DB',
 'XRP_DB',
 'admin',
 'local']

In [18]:
# connecting to the db on MongoDb and creating collection in the db
db = client['Finale']
collection = db['Cardano']

In [19]:
# adding dataframe to mongoDb
cardano_df.reset_index(inplace=True)
cardano_df_dict = cardano_df.to_dict("records")

In [20]:
# Insert collection
collection.insert_many(cardano_df_dict)

<pymongo.results.InsertManyResult at 0x17bb9b63a80>