Skip to content

Commit 69f072a

Browse files
author
Debra Ray
committed
added scripts
1 parent 3239857 commit 69f072a

File tree

5 files changed

+159
-0
lines changed

5 files changed

+159
-0
lines changed

Diff for: scrape_github_repos.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Sep 15 11:15:38 2019
4+
5+
@author: rayde
6+
"""
7+
8+
9+
url = 'https://github.com/dray89?utf8=%E2%9C%93&tab=repositories&q=&type=fork&language='
10+
soup_page = scraper(symbol).__general__(url)
11+
a = soup_page.find_all('a', class_="muted-link mr-3")
12+
a[0]['href']
13+
14+
for each in a:
15+
print(each['href'])

Diff for: scrape_tmx.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Sep 15 11:15:38 2019
4+
5+
@author: rayde
6+
"""
7+
8+
9+
url = 'https://web.tmxmoney.com/screener.php?qm_page=46608'
10+
soup_page = scraper(symbol).__general__(url)
11+
a = soup_page.find_all('tr', class_="menuItem")
12+
a[0]['href']
13+
14+
for each in a:
15+
print(each['href'])

Diff for: sectors.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Sep 15 13:03:31 2019
4+
5+
@author: rayde
6+
"""
7+
import os.path
8+
from os import path
9+
import re
10+
11+
class sector:
12+
def __init__(self, sector):
13+
'''sector choices = ["Basic_Materials", "Communication_Services",
14+
"Consumer_Cyclical", "Consumer_Defensive", "Energy",
15+
"Financial_Services", "Healthcare", "Industrials", "Real_Estate",
16+
"Technology", "Utilities"]'''
17+
self.sector = sector
18+
file = "finance_python/{0}.txt".format(sector.lower())
19+
self.file = open(file, 'r')
20+
self.contents = self.clean_contents()
21+
22+
def clean_contents(self):
23+
contents = self.file.readlines()
24+
new_list = []
25+
for each in contents:
26+
new_list.append(each.strip('\n'))
27+
return new_list
28+
29+
def print_contents(self):
30+
return print(self.contents)
31+
32+
def lookup_industry(self, symbol):
33+
for index, item in enumerate(self.contents):
34+
if symbol in item:
35+
print(self.contents[index+1])
36+
37+

Diff for: td_scrape.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Fri Sep 20 18:36:51 2019
4+
5+
@author: rayde
6+
"""
7+
import requests, pandas, lxml
8+
from bs4 import BeautifulSoup as soup
9+
from urllib.request import urlopen
10+
from lxml import html
11+
import re
12+
13+
def all_methods():
14+
url = urls()
15+
for item1, item2 in url:
16+
filename = 'C:\\Users\\rayde\\Downloads\\{0}'.format(item1)
17+
con = content(filename)
18+
clean = clean_html(con)
19+
write = write_list(filename, clean)
20+
21+
def urls():
22+
alphabet = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
23+
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
24+
"S", 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
25+
26+
urls = []
27+
28+
for each in alphabet:
29+
u = "https://invest.ameritrade.com/cgi-bin/apps/u/MarginReq?pagehandler=PHMarginRequirements&filter={0}".format(each)
30+
urls.append(u)
31+
urls = list(zip(alphabet, urls))
32+
return urls
33+
34+
def xpath(number = 27):
35+
page = range(number)
36+
xpath_list = []
37+
for i in page:
38+
th = "/html/body/div[3]/p[2]/table[1]/tbody/tr[{0}] \n".format(i)
39+
xpath_list.append(th)
40+
return xpath_list
41+
42+
def readurl(url, html_filename):
43+
response = requests.get(url, html_filename)
44+
return response
45+
46+
def content(html_file = "C:\\Users\\rayde\\Downloads\\TDAmeritrade.html"):
47+
html_file = html_file + '.html'
48+
f = open(html_file, 'r+')
49+
content = f.read()
50+
return content
51+
52+
def clean_html(contents, rex = "[<td>].*[</td>]"):
53+
clean_list = re.findall(rex, contents)
54+
return clean_list
55+
56+
def write_list(filename, clean_list, letter = '[A-Z]+'):
57+
filename = filename + '.txt'
58+
with open(filename, 'a+') as file:
59+
for item in clean_list:
60+
y = re.findall(letter, item)
61+
if len(y)>0:
62+
y.append('\n')
63+
a = " "
64+
y = a.join(y)
65+
file.writelines(y)

Diff for: xpath.txt

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/html/body/div[3]/p[2]/table[1]/tbody/tr[0]
2+
/html/body/div[3]/p[2]/table[1]/tbody/tr[1]
3+
/html/body/div[3]/p[2]/table[1]/tbody/tr[2]
4+
/html/body/div[3]/p[2]/table[1]/tbody/tr[3]
5+
/html/body/div[3]/p[2]/table[1]/tbody/tr[4]
6+
/html/body/div[3]/p[2]/table[1]/tbody/tr[5]
7+
/html/body/div[3]/p[2]/table[1]/tbody/tr[6]
8+
/html/body/div[3]/p[2]/table[1]/tbody/tr[7]
9+
/html/body/div[3]/p[2]/table[1]/tbody/tr[8]
10+
/html/body/div[3]/p[2]/table[1]/tbody/tr[9]
11+
/html/body/div[3]/p[2]/table[1]/tbody/tr[10]
12+
/html/body/div[3]/p[2]/table[1]/tbody/tr[11]
13+
/html/body/div[3]/p[2]/table[1]/tbody/tr[12]
14+
/html/body/div[3]/p[2]/table[1]/tbody/tr[13]
15+
/html/body/div[3]/p[2]/table[1]/tbody/tr[14]
16+
/html/body/div[3]/p[2]/table[1]/tbody/tr[15]
17+
/html/body/div[3]/p[2]/table[1]/tbody/tr[16]
18+
/html/body/div[3]/p[2]/table[1]/tbody/tr[17]
19+
/html/body/div[3]/p[2]/table[1]/tbody/tr[18]
20+
/html/body/div[3]/p[2]/table[1]/tbody/tr[19]
21+
/html/body/div[3]/p[2]/table[1]/tbody/tr[20]
22+
/html/body/div[3]/p[2]/table[1]/tbody/tr[21]
23+
/html/body/div[3]/p[2]/table[1]/tbody/tr[22]
24+
/html/body/div[3]/p[2]/table[1]/tbody/tr[23]
25+
/html/body/div[3]/p[2]/table[1]/tbody/tr[24]
26+
/html/body/div[3]/p[2]/table[1]/tbody/tr[25]
27+
/html/body/div[3]/p[2]/table[1]/tbody/tr[26]

0 commit comments

Comments
 (0)