added scripts

Debra Ray · Debra Ray · commit 69f072aeddc0 · 2019-09-21T00:13:38.000-04:00
diff --git a/scrape_github_repos.py b/scrape_github_repos.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Sep 15 11:15:38 2019
+
+@author: rayde
+"""
+
+
+url = 'https://github.com/dray89?utf8=%E2%9C%93&tab=repositories&q=&type=fork&language='
+soup_page = scraper(symbol).__general__(url)
+a = soup_page.find_all('a', class_="muted-link mr-3")
+a[0]['href']
+
+for each in a:
+    print(each['href'])
diff --git a/scrape_tmx.py b/scrape_tmx.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Sep 15 11:15:38 2019
+
+@author: rayde
+"""
+
+
+url = 'https://web.tmxmoney.com/screener.php?qm_page=46608'
+soup_page = scraper(symbol).__general__(url)
+a = soup_page.find_all('tr', class_="menuItem")
+a[0]['href']
+
+for each in a:
+    print(each['href'])
diff --git a/sectors.py b/sectors.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Sep 15 13:03:31 2019
+
+@author: rayde
+"""
+import os.path
+from os import path
+import re
+
+class sector:
+    def __init__(self, sector): 
+        '''sector choices = ["Basic_Materials", "Communication_Services",
+        "Consumer_Cyclical", "Consumer_Defensive", "Energy", 
+        "Financial_Services", "Healthcare", "Industrials", "Real_Estate", 
+        "Technology", "Utilities"]'''
+        self.sector = sector
+        file = "finance_python/{0}.txt".format(sector.lower())
+        self.file = open(file, 'r')
+        self.contents = self.clean_contents()
+
+    def clean_contents(self):
+        contents = self.file.readlines()
+        new_list = []
+        for each in contents:
+            new_list.append(each.strip('\n'))
+        return new_list
+
+    def print_contents(self):
+        return print(self.contents)
+
+    def lookup_industry(self, symbol):
+        for index, item in enumerate(self.contents):
+            if symbol in item:
+                print(self.contents[index+1])
+
+
diff --git a/td_scrape.py b/td_scrape.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 20 18:36:51 2019
+
+@author: rayde
+"""
+import requests, pandas, lxml
+from bs4 import BeautifulSoup as soup
+from urllib.request import urlopen
+from lxml import html
+import re
+
+def all_methods():
+    url = urls()
+    for item1, item2 in url:
+        filename = 'C:\\Users\\rayde\\Downloads\\{0}'.format(item1)
+        con = content(filename)
+        clean = clean_html(con)
+        write = write_list(filename, clean)
+
+def urls():
+    alphabet = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
+        'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
+        "S", 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
+
+    urls = []
+
+    for each in alphabet:
+        u = "https://invest.ameritrade.com/cgi-bin/apps/u/MarginReq?pagehandler=PHMarginRequirements&filter={0}".format(each)
+        urls.append(u)
+        urls = list(zip(alphabet, urls))
+    return urls
+
+def xpath(number = 27):
+    page = range(number)
+    xpath_list = []
+    for i in page:
+        th = "/html/body/div[3]/p[2]/table[1]/tbody/tr[{0}] \n".format(i)
+        xpath_list.append(th)
+    return xpath_list
+
+def readurl(url, html_filename):
+    response = requests.get(url, html_filename)
+    return response
+
+def content(html_file = "C:\\Users\\rayde\\Downloads\\TDAmeritrade.html"):
+    html_file = html_file + '.html'
+    f = open(html_file, 'r+')
+    content = f.read()
+    return content
+
+def clean_html(contents, rex = "[<td>].*[</td>]"):
+    clean_list = re.findall(rex, contents)
+    return clean_list
+
+def write_list(filename, clean_list, letter = '[A-Z]+'):
+    filename = filename + '.txt'
+    with open(filename, 'a+') as file:
+        for item in clean_list:
+            y = re.findall(letter, item)
+            if len(y)>0:
+                y.append('\n')
+                a = " "
+                y = a.join(y)
+                file.writelines(y)
diff --git a/xpath.txt b/xpath.txt
@@ -0,0 +1,27 @@
+/html/body/div[3]/p[2]/table[1]/tbody/tr[0] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[1] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[2] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[3] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[4] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[5] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[6] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[7] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[8] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[9] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[10] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[11] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[12] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[13] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[14] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[15] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[16] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[17] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[18] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[19] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[20] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[21] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[22] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[23] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[24] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[25] 
+/html/body/div[3]/p[2]/table[1]/tbody/tr[26]