# Extracting Data from https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory and preprocessing and saving onto a MongoDB database

In [2]:
# import libraries
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient
import certifi

In [3]:
# create a dataframe from the csv
path ="C:/UC_Berkeley\Analysis_Projects_Class_Folder/Module20_Final_Project\Data_Kaggle_Historical/coin_XRP.csv"
df = pd.read_csv(path)
df

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,XRP,XRP,2013-08-05 23:59:59,0.005980,0.005613,0.005875,0.005613,0.000000e+00,4.387916e+07
1,2,XRP,XRP,2013-08-06 23:59:59,0.005661,0.004629,0.005637,0.004680,0.000000e+00,3.659101e+07
2,3,XRP,XRP,2013-08-07 23:59:59,0.004682,0.004333,0.004669,0.004417,0.000000e+00,3.453412e+07
3,4,XRP,XRP,2013-08-08 23:59:59,0.004424,0.004175,0.004397,0.004254,0.000000e+00,3.325863e+07
4,5,XRP,XRP,2013-08-09 23:59:59,0.004367,0.004253,0.004257,0.004291,0.000000e+00,3.354750e+07
...,...,...,...,...,...,...,...,...,...,...
2888,2889,XRP,XRP,2021-07-02 23:59:59,0.667287,0.634726,0.659890,0.656763,2.061607e+09,3.030759e+10
2889,2890,XRP,XRP,2021-07-03 23:59:59,0.683677,0.644653,0.655639,0.672888,1.872820e+09,3.105172e+10
2890,2891,XRP,XRP,2021-07-04 23:59:59,0.707783,0.665802,0.673218,0.694945,1.885242e+09,3.206960e+10
2891,2892,XRP,XRP,2021-07-05 23:59:59,0.695653,0.648492,0.695653,0.654300,2.076373e+09,3.019395e+10


In [4]:
# checking for mull values
df.isnull().sum().sum()
df.isna().sum()

SNo          0
Name         0
Symbol       0
Date         0
High         0
Low          0
Open         0
Close        0
Volume       0
Marketcap    0
dtype: int64

In [5]:
# dropping irrelevant columns
df = df.drop(columns=['SNo', 'Symbol'])
df.head()

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
0,XRP,2013-08-05 23:59:59,0.00598,0.005613,0.005875,0.005613,0.0,43879160.0
1,XRP,2013-08-06 23:59:59,0.005661,0.004629,0.005637,0.00468,0.0,36591010.0
2,XRP,2013-08-07 23:59:59,0.004682,0.004333,0.004669,0.004417,0.0,34534120.0
3,XRP,2013-08-08 23:59:59,0.004424,0.004175,0.004397,0.004254,0.0,33258630.0
4,XRP,2013-08-09 23:59:59,0.004367,0.004253,0.004257,0.004291,0.0,33547500.0


In [6]:
# converting date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [7]:
# data being analyzed for last five years, so filtering data for that date range
start = '2016-07-06'
end = '2021-07-06'

In [8]:
# creating a variable to store that date range
fiveyears = (df['Date']>start) & (df['Date']<= end)

In [9]:
# creating a new dataframe after applying the above filters
XRP_df = df.loc[fiveyears]
XRP_df

Unnamed: 0,Name,Date,High,Low,Open,Close,Volume,Marketcap
1066,XRP,2016-07-06 23:59:59,0.006743,0.006687,0.006716,0.006691,6.001950e+05,2.364875e+08
1067,XRP,2016-07-07 23:59:59,0.006699,0.006495,0.006689,0.006598,7.896930e+05,2.332269e+08
1068,XRP,2016-07-08 23:59:59,0.006715,0.006563,0.006609,0.006715,6.040760e+05,2.373638e+08
1069,XRP,2016-07-09 23:59:59,0.006718,0.006624,0.006717,0.006679,5.774470e+05,2.360786e+08
1070,XRP,2016-07-10 23:59:59,0.006676,0.006621,0.006672,0.006674,5.710470e+05,2.359033e+08
...,...,...,...,...,...,...,...,...
2887,XRP,2021-07-01 23:59:59,0.704785,0.646796,0.704785,0.661180,2.553971e+09,3.051144e+10
2888,XRP,2021-07-02 23:59:59,0.667287,0.634726,0.659890,0.656763,2.061607e+09,3.030759e+10
2889,XRP,2021-07-03 23:59:59,0.683677,0.644653,0.655639,0.672888,1.872820e+09,3.105172e+10
2890,XRP,2021-07-04 23:59:59,0.707783,0.665802,0.673218,0.694945,1.885242e+09,3.206960e+10


In [10]:
# exporting the dataframe to csv
XRP_df.to_csv('XRP(fiveyears).csv')

# Storing file in MongoDB (DB name : Finale)

In [13]:
# Read in mongodb server location as client
client = MongoClient("mongodb+srv://Group7:Finale@finalsegment1.690c0.mongodb.net/bitcoin_db?retryWrites=true&w=majority",tlsCAFile=certifi.where())

In [14]:
# Find Databases
client.list_database_names()

['Bitcoin_db',
 'Cardano_DB',
 'Ethereum_db',
 'Finale',
 'Tether_DB',
 'XRP_DB',
 'admin',
 'local']

In [15]:
# connecting to the db on MongoDb and creating collection in the db
db = client['Finale']
collection = db['XRP']

In [16]:
# adding dataframe to mongoDb
XRP_df.reset_index(inplace=True)
XRP_df_dict = XRP_df.to_dict("records")

In [17]:
# Insert collection
collection.insert_many(XRP_df_dict)

<pymongo.results.InsertManyResult at 0x1caaa27f0c0>