Skip to content

Commit

Permalink
crawler updated:
Browse files Browse the repository at this point in the history
  • Loading branch information
Aren Hovsepyan authored and Aren Hovsepyan committed Mar 19, 2017
1 parent c35831d commit 2c2ae69
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions main.py
Expand Up @@ -10,21 +10,23 @@

# parsing parameters
args = parser.parse_args()
url = args.url.rstrip("/")

found_links = []

# initializeing crawler
crawler = Crawler(args.url, exclude=args.exclude, no_verbose=args.no_verbose);
crawler = Crawler(url, exclude=args.exclude, no_verbose=args.no_verbose);

# fetch links
links = crawler.start()


#write into file
with open(args.output, "w") as file:
file.write('<?xml version="1.0" encoding="UTF-8"?>\n\t<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')

for link in links:
file.write("\n\t\t<url>\n\t\t\t<loc>\n\t\t\t\t{0}{1}/\n\t\t\t</loc>\n\t\t</url>".format(args.url.rstrip("/"), link))
file.write("\n\t\t<url>\n\t\t\t<loc>\n\t\t\t\t{0}{1}/\n\t\t\t</loc>\n\t\t</url>".format(url, link))

file.write('</urlset>')

Expand Down

0 comments on commit 2c2ae69

Please sign in to comment.