# Extracting Data from https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory and preprocessing and saving onto a MongoDB database

In [2]:
# import libraries
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient
import certifi

In [4]:
# create a dataframe from the csv
path ="C:/UC_Berkeley\Analysis_Projects_Class_Folder/Module20_Final_Project\Data_Kaggle_Historical/coin_Tether.csv"
df = pd.read_csv(path)
df

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Tether,USDT,2015-02-26 23:59:59,1.212320,1.194710,1.210420,1.205740,5.955460e+00,3.033642e+05
1,2,Tether,USDT,2015-03-02 23:59:59,0.607890,0.568314,0.571249,0.606502,3.032500e+00,1.525959e+05
2,3,Tether,USDT,2015-03-03 23:59:59,0.606229,0.604416,0.605129,0.606229,3.031130e+00,1.525272e+05
3,4,Tether,USDT,2015-03-06 23:59:59,1.000000,1.000000,1.000000,1.000000,9.264720e+01,2.516000e+05
4,5,Tether,USDT,2015-03-07 23:59:59,1.000000,1.000000,1.000000,1.000000,5.819680e+04,2.516000e+05
...,...,...,...,...,...,...,...,...,...,...
2313,2314,Tether,USDT,2021-07-02 23:59:59,1.000830,0.999991,1.000301,1.000033,4.562659e+10,6.237592e+10
2314,2315,Tether,USDT,2021-07-03 23:59:59,1.000674,0.999914,1.000046,0.999956,4.119499e+10,6.237115e+10
2315,2316,Tether,USDT,2021-07-04 23:59:59,1.000165,0.998902,0.999987,0.999501,4.306666e+10,6.234274e+10
2316,2317,Tether,USDT,2021-07-05 23:59:59,1.001040,0.999426,0.999426,1.000667,4.721585e+10,6.241550e+10


In [5]:
# checking for mull values
df.isnull().sum().sum()
df.isna().sum()

SNo          0
Name         0
Symbol       0
Date         0
High         0
Low          0
Open         0
Close        0
Volume       0
Marketcap    0
dtype: int64

In [6]:
# dropping irrelevant columns
df = df.drop(columns=['SNo', 'Symbol'])
df.head()

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,Tether,2015-02-26 23:59:59,1.21232,1.19471,1.21042,1.20574,5.95546,303364.184
1,Tether,2015-03-02 23:59:59,0.60789,0.568314,0.571249,0.606502,3.0325,152595.9032
2,Tether,2015-03-03 23:59:59,0.606229,0.604416,0.605129,0.606229,3.03113,152527.2164
3,Tether,2015-03-06 23:59:59,1.0,1.0,1.0,1.0,92.647202,251600.0
4,Tether,2015-03-07 23:59:59,1.0,1.0,1.0,1.0,58196.800781,251600.0


In [7]:
# converting date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [8]:
# data being analyzed for last five years, so filtering data for that date range
start = '2016-07-06'
end = '2021-07-06'

In [9]:
# creating a variable to store that date range
fiveyears = (df['Date']>start) & (df['Date']<= end)

In [10]:
# creating a new dataframe after applying the above filters
tether_df = df.loc[fiveyears]
tether_df

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
491,Tether,2016-07-06 23:59:59,1.000000,1.000000,1.000000,1.000000,5.679090e+05,5.951591e+06
492,Tether,2016-07-07 23:59:59,1.000000,0.999989,1.000000,0.999991,1.875330e+06,5.951537e+06
493,Tether,2016-07-08 23:59:59,0.999999,0.999989,0.999991,0.999999,1.277010e+06,5.951585e+06
494,Tether,2016-07-09 23:59:59,0.999999,0.999999,0.999999,0.999999,8.784860e+05,5.951585e+06
495,Tether,2016-07-10 23:59:59,0.999999,0.999999,0.999999,0.999999,2.846420e+05,5.951585e+06
...,...,...,...,...,...,...,...,...
2312,Tether,2021-07-01 23:59:59,1.000640,0.999997,1.000201,1.000226,5.457746e+10,6.239234e+10
2313,Tether,2021-07-02 23:59:59,1.000830,0.999991,1.000301,1.000033,4.562659e+10,6.237592e+10
2314,Tether,2021-07-03 23:59:59,1.000674,0.999914,1.000046,0.999956,4.119499e+10,6.237115e+10
2315,Tether,2021-07-04 23:59:59,1.000165,0.998902,0.999987,0.999501,4.306666e+10,6.234274e+10


In [11]:
# exporting the dataframe to csv
tether_df.to_csv('Tether(fiveyears).csv')

# Storing file in MongoDB (DB name : Finale)

In [12]:
# Read in mongodb server location as client
client = MongoClient("mongodb+srv://Group7:Finale@finalsegment1.690c0.mongodb.net/bitcoin_db?retryWrites=true&w=majority",tlsCAFile=certifi.where())

In [13]:
# Find Databases
client.list_database_names()

['Bitcoin_db',
 'Cardano_DB',
 'Ethereum_db',
 'Finale',
 'Tether_DB',
 'XRP_DB',
 'admin',
 'local']

In [14]:
# connecting to the db on MongoDb and creating collection in the db
db = client['Finale']
collection = db['Tether']

In [15]:
# adding dataframe to mongoDb
tether_df.reset_index(inplace=True)
tether_df_dict = tether_df.to_dict("records")

In [16]:
# Insert collection
collection.insert_many(tether_df_dict)

<pymongo.results.InsertManyResult at 0x21b5fe0eb80>