In [10]:
import pandas as pd
import time
import conn_creds as cc
import numpy as np
import requests, datetime, re
from bs4 import BeautifulSoup
import mysql.connector
from sqlalchemy import create_engine
import sqlalchemy
pd.options.mode.chained_assignment = None

# establish MySQL engine with credentials
db_name = 'crypto_price_db'
engine = create_engine('mysql+mysqlconnector://'+cc.db_username+':'+cc.db_pass+'@'+cc.db_host+':'+cc.db_port+'/'+db_name, echo=False)

In [5]:
## Function for cleaning up df before MySQL push

def cmc_df_cleanup(df):
    # convert all random characters in string varaibles into nan
    df = df[['coin_name','date_hist','open_price','close_price','high_price','low_price','market_cap','volume']]
    df[['open_price','close_price','high_price','low_price','market_cap','volume']] = (
    df[['open_price','close_price','high_price','low_price','market_cap','volume']].apply(lambda x: x.str.replace(',','')))
    df[['open_price','close_price','high_price','low_price','market_cap','volume']] = (
    df[['open_price','close_price','high_price','low_price','market_cap','volume']].apply(lambda x: x.replace('-|^\s*$',np.nan,regex=True)))
    #convert to datetime
    df['date_hist'] = pd.to_datetime(df['date_hist'])
    return df

## Function for updating missing historical data in CMC

def pull_cmc(coin,start_date):
    try:
        final_coin_data = pd.DataFrame()
        url = 'https://coinmarketcap.com/currencies/' + coin + '/historical-data/?start=' + start_date.replace('-','') + "&end=" + str(datetime.datetime.now())[:10].replace('-','')

        r = requests.get(url)

        soup = BeautifulSoup(r.text, 'lxml')
        table = soup.find('table')

        headers = {'coin_name': [np.nan], 'date_hist': [np.nan], 'open_price': [np.nan], 'high_price': [np.nan], 'low_price': [np.nan], 'close_price': [np.nan], 'volume': [np.nan], 'market_cap': [np.nan]}
        coin_data = pd.DataFrame(headers)

        for row in table.find_all('tr')[1:]:
            col = row.find_all('td')

            coin_name = coin
            date = col[0].text.strip()
            open_price = col[1].text.strip()
            high_price = col[2].text.strip()
            low_price = col[3].text.strip()
            close_price = col[4].text.strip()
            volume = col[5].text.strip()
            market_cap = col[6].text.strip()

            temp_df = {'coin_name':coin_name,'date_hist': date, 'open_price': open_price, 'high_price': high_price, 'low_price': low_price, 'close_price': close_price, 'volume': volume, 'market_cap': market_cap}
            coin_data = coin_data.append(temp_df, ignore_index=True)

        coin_data = coin_data.drop(0)
        final_coin_data = final_coin_data.append(coin_data)
        print(str(count) + '. ' + str(coin) + '...done')
        return final_coin_data
    except:
        print(str(count)+ '. missing '+ str(coin))

In [6]:
### Initial pull: use this to build a database in MySQL to pull all CoinMarketCap (CMC) data.
## Step 1 of 2: Build pandas dataframe of historical coin data.

#build list of each coin
url = 'https://coinmarketcap.com/all/views/all/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html5lib')
names=soup.find_all("a",{"class":"currency-name-container"})
names_strings = [str(i) for i in names]

## pull name from each string in a list
names_list =[]
for x in names_strings:
    names_list.append(re.split('/"|currencies/',x)[-2])

#scrape each coin
final_coin_data = pd.DataFrame()
count = 0
missing_coins = []

for item in names_list:
    count += 1
    try:
        url = 'https://coinmarketcap.com/currencies/' + item + '/historical-data/?start=20100101&end=' + str(datetime.datetime.now())[:10].replace('-','')
        r = requests.get(url)

        soup = BeautifulSoup(r.text, 'lxml')
        table = soup.find('table')

        headers = {'coin_name': [np.nan], 'date_hist': [np.nan], 'open_price': [np.nan], 'high_price': [np.nan], 'low_price': [np.nan], 'close_price': [np.nan], 'volume': [np.nan], 'market_cap': [np.nan]}
        coin_data = pd.DataFrame(headers)

        for row in table.find_all('tr')[1:]:
            col = row.find_all('td')

            coin_name = item
            date = col[0].text.strip()
            open_price = col[1].text.strip()
            high_price = col[2].text.strip()
            low_price = col[3].text.strip()
            close_price = col[4].text.strip()
            volume = col[5].text.strip()
            market_cap = col[6].text.strip()

            temp_df = {'coin_name':coin_name,'date_hist': date, 'open_price': open_price, 'high_price': high_price, 'low_price': low_price, 'close_price': close_price, 'volume': volume, 'market_cap': market_cap}
            coin_data = coin_data.append(temp_df, ignore_index=True)

        coin_data = coin_data.drop(0)
        final_coin_data = final_coin_data.append(coin_data)
        print(str(count) + '. ' + str(item) + '...done')
    except:
        missing_coins.append(item)
        print(str(count)+ '. missing '+ str(item))
        
final_coin_data = cmc_df_cleanup(final_coin_data)
final_coin_data.reset_index(drop=True,inplace=True)

1. bitcoin...done
2. ethereum...done
3. ripple...done
4. bitcoin-cash...done
5. eos...done
6. cardano...done
7. litecoin...done
8. stellar...done
9. tron...done
10. iota...done
11. neo...done
12. monero...done
13. dash...done
14. nem...done
15. vechain...done
16. tether...done
17. ethereum-classic...done
18. qtum...done
19. omisego...done
20. icon...done
21. binance-coin...done
22. lisk...done
23. verge...done
24. bitcoin-gold...done
25. zcash...done
26. aeternity...done
27. nano...done
28. steem...done
29. bytom...done
30. ontology...done
31. bitshares...done
32. bytecoin-bcn...done
33. siacoin...done
34. populous...done
35. wanchain...done
36. bitcoin-diamond...done
37. bitcoin-private...done
38. zilliqa...done
39. stratis...done
40. waves...done
41. maker...done
42. rchain...done
43. 0x...done
44. dogecoin...done
45. decred...done
46. status...done
47. digixdao...done
48. mixin...done
49. loopring...done
50. basic-attention-token...done
51. aelf...done
52. aion...done
53. iostoken..

392. revolutionvr...done
393. paccoin...done
394. pareto-network...done
395. datawallet...done
396. gridcoin...done
397. qbao...done
398. sureremit...done
399. prochain...done
400. attention-token-of-media...done
401. ormeus-coin...done
402. sibcoin...done
403. alqo...done
404. oneroot-network...done
405. flash...done
406. divi...done
407. storiqa...done
408. prizm...done
409. solarcoin...done
410. te-food...done
411. posw-coin...done
412. indahash...done
413. icos...done
414. covesting...done
415. latoken...done
416. bitclave...done
417. florincoin...done
418. databits...done
419. uquid-coin...done
420. guaranteed-ethurance-token-extra...done
421. bitcoin-atom...done
422. monetaryunit...done
423. banyan-network...done
424. swarm-fund...done
425. credo...done
426. e-dinar-coin...done
427. blackmoon...done
428. mediccoin...done
429. axpire...done
430. tao...done
431. swissborg...done
432. devery...done
433. hacken...done
434. expanse...done
435. smartlands...done
436. zap...done
437. lu

773. creativecoin...done
774. yocoin...done
775. tigereum...done
776. speed-mining-service...done
777. condensate...done
778. ethereum-movie-venture...done
779. graft...done
780. indorse-token...done
781. ignition...done
782. litedoge...done
783. flypme...done
784. innova...done
785. latiumx...done
786. blocklancer...done
787. ellaism...done
788. biblepay...done
789. wandx...done
790. martexcoin...done
791. canada-ecoin...done
792. unbreakablecoin...done
793. inflationcoin...done
794. kzcash...done
795. ergo...done
796. triaconta...done
797. digitalprice...done
798. inpay...done
799. coinlancer...done
800. the-cypherfunks...done
801. photon...done
802. draftcoin...done
803. escoro...done
804. unitus...done
805. sagacoin...done
806. deutsche-emark...done
807. ganjacoin...done
808. bitwhite...done
809. oceanlab...done
810. ethbits...done
811. numus...done
812. miners-reward-token...done
813. bowhead...done
814. neverdie...done
815. skincoin...done
816. bytecent...done
817. jesus-coin...d

1139. coin...done
1140. icoin...done
1141. tattoocoin...done
1142. sproutsextreme...done
1143. freicoin...done
1144. ratecoin...done
1145. ammo-reloaded...done
1146. gameunits...done
1147. emerald...done
1148. 300-token...done
1149. bitcurrency...done
1150. datacoin...done
1151. shadow-token...done
1152. marscoin...done
1153. globalboost-y...done
1154. golfcoin...done
1155. signatum...done
1156. eternity...done
1157. polcoin...done
1158. digicube...done
1159. litebar...done
1160. revolvercoin...done
1161. paycon...done
1162. ecocoin...done
1163. spacecoin...done
1164. abjcoin...done
1165. quebecoin...done
1166. octocoin...done
1167. jin-coin...done
1168. redcoin...done
1169. px...done
1170. nekonium...done
1171. catcoin...done
1172. satoshimadness...done
1173. zurcoin...done
1174. quatloo...done
1175. theresa-may-coin...done
1176. amsterdamcoin...done
1177. elacoin...done
1178. rubies...done
1179. virtualcoin...done
1180. prime-xi...done
1181. globalcoin...done
1182. bankcoin...done
11

1490. digital-bullion-gold...done
1491. wi-coin...done
1492. tattoocoin-limited...done
1493. bestchain...done
1494. sakuracoin...done
1495. ereal...done
1496. zengold...done
1497. tellurion...done
1498. avatarcoin...done
1499. universalroyalcoin...done
1500. landcoin...done
1501. btcmoon...done
1502. huncoin...done
1503. wink...done
1504. protean...done
1505. altcommunity-coin...done
1506. cubits...done
1507. grandcoin...done
1508. nitro...done
1509. zsecoin...done
1510. cyder...done
1511. storjcoin-x...done
1512. cloud...done
1513. operand...done
1514. fapcoin...done
1515. bitbase...done
1516. supernet-unity...done
1517. primulon...done
1518. topaz...done
1519. bitserial...done
1520. omicron...done
1521. encryptotel-eth...done
1522. stex...done
1523. blazercoin...done
1524. donationcoin...done
1525. rcoin...done
1526. akuya-coin...done
1527. neo-gold...done
1528. fonziecoin...done
1529. batcoin...done
1530. antimatter...done
1531. wearesatoshi...done
1532. smoke...done
1533. levocoin.

In [11]:
##Step 2 of 2, load data into MySQL

##%%time
#define datatypes from scrape
dtypes = {'coin_name':sqlalchemy.types.NVARCHAR(length=100),'date_hist':sqlalchemy.types.Date,'open_price':sqlalchemy.types.Float(asdecimal=True),'close_price':sqlalchemy.types.Float(asdecimal=True),
          'high_price':sqlalchemy.types.Float(asdecimal=True),'low_price':sqlalchemy.types.Float(asdecimal=True),'market_cap':sqlalchemy.types.BIGINT,'volume':sqlalchemy.types.BIGINT}

# Load data to MySQL
final_coin_data.to_sql(name='historical_crypto', con=engine, if_exists = 'replace', index=False, chunksize=10000 , dtype=dtypes)

In [12]:
### Generate volatility metrics and create new analytics table in MySQL
## FYI, I dont think this calculation is correct. I am getting a lot of nulls

analysis_historical =final_coin_data[['coin_name','date_hist','open_price','close_price']]

analysis_historical['Volitility_30_day'] = analysis_historical.groupby('coin_name')['close_price'].rolling(window=30).std().reset_index(drop=True)
analysis_historical['Volitility_90_day'] = analysis_historical.groupby('coin_name')['close_price'].rolling(window=90).std().reset_index(drop=True)
analysis_historical['Volitility_365_day'] = analysis_historical.groupby('coin_name')['close_price'].rolling(window=365).std().reset_index(drop=True) 

## fyi, normally annual volitility is 255 instead of 365 since there are only 255 trading days in a year, but since crypto is all 365 then I leave at 365

## Push to MySQL
analysis_historical.to_sql(name='analysis_historical', con=engine, if_exists = 'replace', index=False, chunksize=10000)