In [1]:
import os
import csv
import requests
import pandas as pd
import numpy as np
import configparser

In [2]:
config = configparser.ConfigParser()
config.read(".env")
APIKEY = config["RAPIDAPI"]["KEY"]

## Outline of Data Extraction Process

 ---
 
 Using the RapidApi, the data will be collected in the following order.
 
 * The symbols of market movers will be gotten using the endpoint "market/v2/get-movers" 
 * The market movers will be added as query to the endpoint "stock/v3/get-historical-data" to collect historical data of the tickers
 * The symbols of the market movers will also be used as query to get their summary which includes the financial earnings using the endpoint stock/v2/get-summary
 * Currency pairs selected for the project are used as query in the endpoint "market/get-spark" to collect data according to interval and range. 
 * The collected data will initially be saved in pandas dataframe. 
 

#### Extract 10 stock losers, gainers and most-active stocks in GB region, get their symbols 

In [3]:
url = "https://yh-finance.p.rapidapi.com/market/v2/get-movers"

querystring = {"region":"GB","lang":"en-GB","count":"10","start":"0"}

headers = {
	"X-RapidAPI-Key": APIKEY,
	"X-RapidAPI-Host": "yh-finance.p.rapidapi.com"
}

response = requests.request("GET", url, headers=headers, params=querystring).json()

In [4]:
# Get the gainers from quotes
gainer_quotes = response["finance"]["result"][0]["quotes"]
gainers = []

for quote in gainer_quotes:
    gainers.append(quote["symbol"]) 

In [5]:
# Get the losers
losers_quotes = response["finance"]["result"][1]["quotes"]
losers = []

for quote in losers_quotes:
    losers.append(quote["symbol"]) 

In [6]:
# Get the most actives
mostactives_quotes = response["finance"]["result"][2]["quotes"]
mostactives = []

for quote in mostactives_quotes:
    mostactives.append(quote["symbol"]) 

In [14]:
# Combine all into one DF
movers_df = pd.DataFrame(list(zip(gainers, losers, mostactives)), columns=['gainers', 'losers', 'mostactives'])

movers_df

Unnamed: 0,gainers,losers,mostactives
0,0BDR.IL,0JI3.L,7DIG.L
1,0MC5.IL,0E2B.IL,ICON.L
2,0RCP.IL,0AAT.IL,KOD.L
3,TUIB.L,MH65.L,SYME.L
4,0RCS.IL,0AAS.IL,0VRF.L
5,0RCR.IL,0V6Y.L,0MRI.IL
6,0XC6.IL,0OI0.L,PREM.L
7,0E4B.IL,0A5O.IL,0RQY.L
8,0I21.L,0MN3.IL,BOIL.L
9,0DP0.IL,0XWG.IL,GST.L


In [15]:
# Save movers in csv
movers_df.to_csv("data/movers.csv", index=False)

#### Extract historical data with the symbols 

In [3]:
#import movers csv
movers_df = pd.read_csv("data/movers.csv")

In [18]:
def history_data(path, movers, key):
    data = []
    url = "https://yh-finance.p.rapidapi.com/stock/v3/get-historical-data"
    for symbol in movers_df[movers]: 
        querystring = {"symbol": symbol,"region":"GB"}

        headers = {
            "X-RapidAPI-Key": key,
            "X-RapidAPI-Host": "yh-finance.p.rapidapi.com"
        }

        response = requests.request("GET", url, headers=headers, params=querystring).json()
        
        names = ['date', 'open', 'high', 'low', 'close', 'volume', 'adjclose', 'symbol', 'movers']
        for stock in response["prices"]:
            if len(stock) == 7:
                history_data = {'date': stock['date'], 
                                'open': stock['open'], 
                                'high': stock['high'], 
                                'low': stock['low'],
                                'close': stock['close'], 'volume': stock['volume'], 
                                'adjclose': stock['adjclose'], 'symbol': symbol,
                                'movers': movers}
                
                data.append(history_data)
        with open(path, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=names)
            writer.writeheader()
            writer.writerows(data)

In [19]:
# Collect historical data of each gainer stock
gainers_history = history_data("data/gainers_history.csv", "gainers", APIKEY)
gainers_history

In [20]:
# Collect historical data of each loser stock
losers_history = history_data("data/losers_history.csv", "losers", APIKEY)
losers_history

In [21]:
# Collect historical data of each mostactive stock
actives_history = history_data("data/actives_history.csv", "mostactives", APIKEY)
actives_history

#### Extract Summary and Financial information of each stock

In [4]:
url = "https://yh-finance.p.rapidapi.com/stock/v2/get-summary"

querystring = {"symbol":"AMRN","region":"US"}

headers = {
	"X-RapidAPI-Key": APIKEY,
	"X-RapidAPI-Host": "yh-finance.p.rapidapi.com"
}

response = requests.request("GET", url, headers=headers, params=querystring).json()

In [9]:
# ToDo
"""
  - Get the summaryProfile of stocks
  - Get the yearly earnings and revenue of all stocks
  - Get the dividend yield of all stock if any
"""
print(response["summaryProfile"])

{'zip': '112-114', 'sector': 'Healthcare', 'fullTimeEmployees': 365, 'longBusinessSummary': 'Amarin Corporation plc, a pharmaceutical company, engages in the development and commercialization of therapeutics for the treatment of cardiovascular diseases in the United States, European countries, Canada, Lebanon, and the United Arab Emirates. It offers VASCEPA, a prescription-only omega-3 fatty acid product, used as an adjunct to diet for reducing triglyceride levels in adult patients with severe hypertriglyceridemia. The company sells its products principally to wholesalers and specialty pharmacy providers. It has a collaboration with Mochida Pharmaceutical Co., Ltd. to develop and commercialize drug products and indications based on the active pharmaceutical ingredient in Vascepa. The company was formerly known as Ethical Holdings plc and changed its name to Amarin Corporation plc in 1999. Amarin Corporation plc was incorporated in 1989 and is headquartered in Dublin, Ireland.', 'city':

In [17]:
print(response["quoteType"])

{'exchange': 'NGM', 'shortName': 'Amarin Corporation plc', 'longName': 'Amarin Corporation plc', 'exchangeTimezoneName': 'America/New_York', 'exchangeTimezoneShortName': 'EDT', 'isEsgPopulated': False, 'gmtOffSetMilliseconds': '-14400000', 'quoteType': 'EQUITY', 'symbol': 'AMRN', 'messageBoardId': 'finmb_407863', 'market': 'us_market'}


In [11]:
print(response["financialData"])

{'ebitdaMargins': {'raw': -0.19277, 'fmt': '-19.28%'}, 'profitMargins': {'raw': -0.28658, 'fmt': '-28.66%'}, 'grossMargins': {'raw': 0.70575994, 'fmt': '70.58%'}, 'operatingCashflow': {'raw': -180092000, 'fmt': '-180.09M', 'longFmt': '-180,092,000'}, 'revenueGrowth': {'raw': -0.375, 'fmt': '-37.50%'}, 'operatingMargins': {'raw': -0.20115, 'fmt': '-20.12%'}, 'ebitda': {'raw': -71169000, 'fmt': '-71.17M', 'longFmt': '-71,169,000'}, 'targetLowPrice': {'raw': 1.5, 'fmt': '1.50'}, 'recommendationKey': 'hold', 'grossProfits': {'raw': 260562000, 'fmt': '260.56M', 'longFmt': '260,562,000'}, 'freeCashflow': {'raw': -85622000, 'fmt': '-85.62M', 'longFmt': '-85,622,000'}, 'targetMedianPrice': {'raw': 2.5, 'fmt': '2.50'}, 'currentPrice': {'raw': 1.35, 'fmt': '1.35'}, 'earningsGrowth': {'raw': -0.942, 'fmt': '-94.20%'}, 'currentRatio': {'raw': 2.656, 'fmt': '2.66'}, 'returnOnAssets': {'raw': -0.0475, 'fmt': '-4.75%'}, 'numberOfAnalystOpinions': {'raw': 6, 'fmt': '6', 'longFmt': '6'}, 'targetMeanPri

In [14]:
print(response["summaryDetail"])

{'previousClose': {'raw': 1.41, 'fmt': '1.4100'}, 'regularMarketOpen': {'raw': 1.39, 'fmt': '1.3900'}, 'twoHundredDayAverage': {'raw': 1.4304, 'fmt': '1.4304'}, 'trailingAnnualDividendYield': {'raw': 0, 'fmt': '0.00%'}, 'payoutRatio': {'raw': 0, 'fmt': '0.00%'}, 'volume24Hr': {}, 'regularMarketDayHigh': {'raw': 1.4, 'fmt': '1.4000'}, 'navPrice': {}, 'averageDailyVolume10Day': {'raw': 2945240, 'fmt': '2.95M', 'longFmt': '2,945,240'}, 'totalAssets': {}, 'regularMarketPreviousClose': {'raw': 1.41, 'fmt': '1.4100'}, 'fiftyDayAverage': {'raw': 1.7018, 'fmt': '1.7018'}, 'trailingAnnualDividendRate': {'raw': 0, 'fmt': '0.00'}, 'open': {'raw': 1.39, 'fmt': '1.3900'}, 'toCurrency': None, 'averageVolume10days': {'raw': 2945240, 'fmt': '2.95M', 'longFmt': '2,945,240'}, 'expireDate': {}, 'yield': {}, 'algorithm': None, 'dividendRate': {}, 'exDividendDate': {}, 'beta': {'raw': 1.791349, 'fmt': '1.79'}, 'circulatingSupply': {}, 'startDate': {}, 'regularMarketDayLow': {'raw': 1.35, 'fmt': '1.3500'}, 

In [19]:
# response["summaryProfile"]
# response["quoteType"]
# response["financialData"]
# response["summaryDetail"]
profile_earnings = {
   "symbol": response["quoteType"]["symbol"],
   "summary": response["summaryProfile"],
   "quoteType": response["quoteType"]["quoteType"],
   "beta": response["summaryDetail"]["beta"]["raw"],
   "dividendRate": response["summaryDetail"]["dividendRate"]["raw"],
   "marketCap": response["summaryDetail"]["marketCap"]["raw"],
   "dividendYield": response["summaryDetail"]["dividendYield"]["raw"],
   "exDividendDate": response["summaryDetail"]["exDividendDate"]["raw"],
   "dayHigh": response["summaryDetail"]["dayHigh"]["raw"],
   "dayLow": response["summaryDetail"]["dayLow"]["raw"],
   "ask": response["summaryDetail"]["ask"]["raw"],
   "previousClose": response["summaryDetail"]["previousClose"]["raw"],
   "marketOpen": response["summaryDetail"]["open"]["raw"],
   "bid": response["summaryDetail"]["bid"]["raw"],
   "askSize": response["summaryDetail"]["askSize"]["raw"],
   "bidSize": response["summaryDetail"]["bidSize"]["raw"],
   "volume": response["summaryDetail"]["volume"]["raw"],
   "fiftyTwoWeekHigh": response["summaryDetail"]["fiftyTwoWeekHigh"]["raw"],
   "fiftyTwoWeekLow": response["summaryDetail"]["fiftyTwoWeekLow"]["raw"],
   "earnings": response["earnings"]["financialsChart"]["yearly"]
}

print(profile_earnings)

AttributeError: 'dict' object has no attribute 'raw'

## Outline of Data Cleaning Process

---

  The collected data will here be cleaned and processed
  
  

## Data Wrangling and Transformation

## Data Storage and Loading