In [33]:
import requests
import json
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import urllib
import datetime
import os
from pytz import timezone
from pathlib import Path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options

In [43]:
# Format for time
fmt = "%Y-%m-%d %H:%M:%S"

userBot = "gpmbot"
pwdBot = "9ALjKza5GmXqxMKNf"

In [44]:
# URL for chat
rootUrl = "https://darkroom.global-precious-metals.com"

In [4]:
def drkrmLogin(user=userBot,password=pwdBot, rootUrl = rootUrl):
    """
    login tryout
    """
    apiLogin = "/api/v1/login"
    data = {"user" : user, "password" : password}
    r = requests.post(rootUrl+apiLogin,data=json.dumps(
    data), headers={'Content-Type': 'application/json'})
    if r.json()['status'] == 'success':
        return r.json()['data']['userId'], r.json()['data']['authToken'] 
    else:
        print(r.json())
        raise Exception("Something went wrong")

In [5]:
def post_message(msg, channel="#general", user=userBot,password=pwdBot, rootUrl = rootUrl):
    """
    we authenticate, then post
    """
    apiMsg = "/api/v1/chat.postMessage"
    userId, authToken = drkrmLogin(user=user, password=password, rootUrl=rootUrl)
    payload = { "channel" : channel, "text" : msg}
    response = requests.post(rootUrl+apiMsg,data=json.dumps(
    payload), headers={'Content-Type': 'application/json', 'X-Auth-Token' : authToken, 'X-USer-Id' : userId})
    return response

In [6]:
DCurl = "https://www.dukascopy.com/swiss/english/home/?utm_source=freeserv"

In [7]:
def validatePath(path):
    """
    Checks if path refers to a valid directory. If not an exception is raised.
    """
    
    try:
        os.path.isfile(filename)
    except:
        raise NotADirectoryError("The specified path does not exist or is invalid. The data will be collected and stored in the current working directory in the file: ")

In [45]:
def gmtToUtc(time):
    utcTime = time.astimezone(timezone("UTC"))
    return utcTime.strftime(fmt)

def currUtcTime():
    return datetime.datetime.now(timezone('UTC')).strftime(fmt)

In [50]:
def getDukasTime(soup):
    """
    Returns the extracted time as given on the DukasCopy webpage from where the DukasCopy data is being extracted from.
    
    Parameters:
    
    soup (soup): The BeautifulSoup object containing the parsed HTML representation of the DukasCopy webpage.
    
    Returns:
    date_time_obj (Datetime): Datetime object representing the time on the page when the data was taken.
    """
    tdata = soup.find_all("span", {"id": "timeUpdate"})
    dtxt = tdata[0].text
    
    date_time_obj = datetime.datetime.strptime(dtxt, ' %a, %d %b %Y %H:%M:%S GMT')
    
    return date_time_obj

In [9]:
def updateData(NewFrame, filename):
    """
    Updates the pickle file specified bt "filepath" by appending new data from the "NewFrame" dataframe to it.
    
    Parameters:
    
    NewFrame (DataFrame): The dataframe which is to be appended to the end of the pickle file to update the pickle file with the new data.
    filepath (str): The filepath of the pickle file where new dataframe data is to be appended.
    
    """
    if (not filename.endswith(".pkl")):
        raise Exception("The filename specified must end with .pkl extension. The filename provided was: " + filename)
    
    if not os.path.isfile(filename):
        pd.to_pickle(NewFrame, filename)
        
    else:   
        df = pd.read_pickle(filename)
        df = df.append(NewFrame, sort=False)
        df.to_pickle(filename)

In [85]:
# Settings for headless chrome

options = Options()
options.headless = True
options.add_argument('--no-proxy-server')
options.add_argument("--proxy-server='direct://'")
options.add_argument("--proxy-bypass-list=*")

In [86]:
def getDukasCopyData(pathToChromeDriver):
    """
    Returns a DataFrame object containing the most recent Gold Data from the DukasCopy website.
    Scrapes the data from the DukasCopy website before formatting and returning the data as a pandas DataFrame.
    
    Parameters:
    pathToChromeDriver (str): Path to the chromedriver executable to be used to open web pages.
    
    Returns:
    DCDataFrame (DataFrame): A pandas DataFrame containing the most recent Gold Data from the DukasCopy website.
    dataTime (Datetime): Datetime object representing the time at which data was fetched.
    """
    browser = webdriver.Chrome(pathToChromeDriver, options=options)
    browser.get(DCurl)
    
    DChtml = browser.page_source
    soup = BeautifulSoup(DChtml, "lxml")
    
    # Finding the exact table with all the required data
    data = soup.find_all("table", {"id": "list"})
    
    # Creating and Modifying dataframe with the table 
    DCDataFrame = pd.read_html(str(data))[0]
    
    # Add a timestamp to the data
    dataTime = getDukasTime(soup)
    DCDataFrame["timestamp"] = dataTime
    DCDataFrameDCDataFrame.set_index("timestamp")
    
    # Filter dataframe only to Gold Data
    DCDataFrame = DCDataFrame[DCDataFrame["Live"] == "XAU/USD"]
    
    browser.quit()
    return DCDataFrame, dataTime

In [104]:
def checkBidSurge(dataframe, threshold=0.0002):
    pctChange = dataframe["Bid"].pct_change()[-1]
    if abs(pctChange) >= threshold:
        print("EXCEEDED: BID PRICE AT " + str(dataframe["Bid"][-1]) + " UP/DOWN " + str(pctChange) + "%")

In [52]:
def fetchAndSaveDukas(filename, chromedriverpath):
    """
    Fetches the most recent Gold Data from the DukasCopy website and updates the pickle file specified by "dukasFilepath" with the newest data.
    If no pickle file with the specified name exists, a new one will be created.
    If no filepath is specified, the current working directory will be used and data will be stored in the pickle file .
    
    Parameters:
    dukasFilepath(str): The filepath to the pickle file where the fetched data will be stored.
    chromedriverpath (str): Path to the chromedriver executable to be used to open web pages.
    """
    df = getDukasCopyData(chromedriverpath)
    updateData(df, filename)

In [75]:
def sendPrices(filepath):
    currData, timestamp = getDukasCopyData("/Users/abhimanyadav/Desktop/chromedriver")
    
    try:
        bidPrice = currData.iloc[0]["Bid"]
        askPrice = currData.iloc[0]["Ask"]
    except:
        print("Skipped here")
    
    updateData(currData, filepath)
    formattedMsg = "GOLD PRICE as of " + str(timestamp) + "\n \nBid Price: " + str(bidPrice) + "\nAsk Price: " + str(askPrice)

    post_message(formattedMsg)

In [15]:
def driver():
    while True:
        sendPrices()
        time.sleep(7200)

In [88]:
def testRun(filepath="DukasGoldData.pkl"):
    currData, timestamp = getDukasCopyData("/Users/abhimanyadav/Desktop/chromedriver")
    
    try:
        bidPrice = currData.iloc[0]["Bid"]
        askPrice = currData.iloc[0]["Ask"]
    except:
        print("Skipped here")
    
    updateData(currData, filepath)
    formattedMsg = "GOLD PRICE as of " + str(timestamp) + "\n \nBid Price: " + str(bidPrice) + "\nAsk Price: " + str(askPrice)
    
    print(formattedMsg)

In [87]:
while True:
    testRun()
    time.sleep(25)

NameError: name 'DCDataFrameDCDataFrame' is not defined

In [63]:
df = pd.read_pickle("DukasGoldData.pkl")

In [79]:
df["Bid"].pct_change()

timestamp
2020-01-14 16:06:02         NaN
2020-01-14 16:07:02   -0.000207
2020-01-14 16:08:01    0.000084
2020-01-14 16:09:00    0.000071
2020-01-14 16:10:01   -0.000175
Name: Bid, dtype: float64

In [80]:
df["Bid"].pct_change()[-1]

-0.00017487952743666657

In [89]:
df["Bid"][-1]

1543.65

In [105]:
checkBidSurge(df[0:2])

EXCEEDED: BID PRICE AT 1543.68 UP/DOWN -0.0002072538860102835%


In [100]:
df[0:2]

Unnamed: 0_level_0,Live,Bid,Ask,Spread
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-14 16:06:02,XAU/USD,1544.0,1544.29,28.7
2020-01-14 16:07:02,XAU/USD,1543.68,1543.93,24.7


In [98]:
df[0:2]["Bid"].pct_change()

timestamp
2020-01-14 16:06:02         NaN
2020-01-14 16:07:02   -0.000207
Name: Bid, dtype: float64