diff --git a/app/services/importer/xml/workable_parser.rb b/app/services/importer/xml/workable_parser.rb index 778fc21f..7efb0afd 100644 --- a/app/services/importer/xml/workable_parser.rb +++ b/app/services/importer/xml/workable_parser.rb @@ -3,11 +3,10 @@ module Xml class WorkableParser < ApplicationService WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' LOCAL_XML_PATH = 'workable.xml' - S3_BUCKET = 'your bucket name' # please enter your bucket name - S3_REGION = 'your regoin' # please enter your bucket region - S3_KEY = 'workable.xml' + S3_BUCKET = 'S3_BUCKET_NAME' # please enter your bucket name + S3_REGION = 'S3_REGION' # please enter your bucket region REDIRECTED_URLS_PATH = 'redirected_urls.json' - MAX_RETRIES = 5 + MAX_RETRIES = 5 # workable allow 5 tries in certain time frame RETRY_DELAY = 5 def initialize @@ -16,7 +15,7 @@ def initialize end def import_jobs - # stream_and_save_xml + stream_and_save_xml parse_xml save_and_upload create_jobs @@ -25,38 +24,43 @@ def import_jobs private def stream_and_save_xml - puts("Started stream_and_save_xml") - response = retry_request do - Faraday.get(WORKABLE_URL) do |req| - req.options.timeout = 600 - req.options.open_timeout = 600 - end - end - - if response - File.open(LOCAL_XML_PATH, 'wb') do |file| - response.body.each do |chunk| - file.write(chunk) + puts "Started stream_and_save_xml" + uri = URI.parse(WORKABLE_URL) + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| + request = Net::HTTP::Get.new(uri.request_uri) + begin + http.request(request) do |response| + if response.is_a?(Net::HTTPSuccess) + File.open(LOCAL_XML_PATH, 'wb') do |file| + total_size = response['content-length'].to_i + downloaded_size = 0 + response.read_body do |chunk| + file.write(chunk) + downloaded_size += chunk.size + if total_size > 0 + puts "Chunk download progress: #{((downloaded_size.to_f / total_size) * 100).round(2)}%" + else + puts "Chunk download progress: #{(downloaded_size.to_f / (downloaded_size + 1) * 100).round(2)}%" + end + end + end + puts "File saved: #{LOCAL_XML_PATH}" + else + puts "Failed to retrieve file: #{response.code} #{response.message}" + end end + rescue StandardError => e + puts "Request failed: #{e.message}. Retrying..." + retry_request { stream_and_save_xml } # Retry the whole download in case of any error end - puts "File saved: #{LOCAL_XML_PATH}" - upload_to_s3(LOCAL_XML_PATH, S3_KEY) - else - puts "Failed to save file." end end def retry_request retries = 0 begin - response = yield - puts "Response status: #{response.status}, Response body length: #{response.body.length}" - if response.status == 429 # Too Many Requests - raise Faraday::Error, "Too Many Requests" - end - response - rescue Faraday::Error => e - puts "Request failed: #{e.message}" + yield + rescue StandardError => e retries += 1 if retries <= MAX_RETRIES delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff @@ -65,7 +69,6 @@ def retry_request retry else puts "Failed after #{MAX_RETRIES} retries: #{e.message}" - nil end end end @@ -95,7 +98,7 @@ def upload_to_s3(local_path) end def create_jobs - print("Total number of URLs are #{@urls.count}") + print("Total number of URLs are #{@urls.count}. Please create jobs as per need. \n") end end end diff --git a/lib/tasks/import_jobs.rake b/lib/tasks/import_jobs.rake index 55a49f8d..9c32a760 100644 --- a/lib/tasks/import_jobs.rake +++ b/lib/tasks/import_jobs.rake @@ -3,7 +3,7 @@ namespace :importer do desc "Import jobs from Workable" task import_jobs: :environment do Importer::Xml::WorkableParserJob.perform_now - puts "Jobs imported successfully." + puts "\nJobs imported successfully." end end end \ No newline at end of file