Skip to content

Commit

Permalink
1. updated code with s3 uploading comments.
Browse files Browse the repository at this point in the history
2. Updated logger when rake task run successfully
  • Loading branch information
Bilal-Abbas-Gigalabs committed Jul 8, 2024
1 parent 09467fb commit 6624631
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 32 deletions.
65 changes: 34 additions & 31 deletions app/services/importer/xml/workable_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ module Xml
class WorkableParser < ApplicationService
WORKABLE_URL = 'https://www.workable.com/boards/workable.xml'
LOCAL_XML_PATH = 'workable.xml'
S3_BUCKET = 'your bucket name' # please enter your bucket name
S3_REGION = 'your regoin' # please enter your bucket region
S3_KEY = 'workable.xml'
S3_BUCKET = 'S3_BUCKET_NAME' # please enter your bucket name
S3_REGION = 'S3_REGION' # please enter your bucket region
REDIRECTED_URLS_PATH = 'redirected_urls.json'
MAX_RETRIES = 5
MAX_RETRIES = 5 # workable allow 5 tries in certain time frame
RETRY_DELAY = 5

def initialize
Expand All @@ -16,7 +15,7 @@ def initialize
end

def import_jobs
# stream_and_save_xml
stream_and_save_xml
parse_xml
save_and_upload
create_jobs
Expand All @@ -25,38 +24,43 @@ def import_jobs
private

def stream_and_save_xml
puts("Started stream_and_save_xml")
response = retry_request do
Faraday.get(WORKABLE_URL) do |req|
req.options.timeout = 600
req.options.open_timeout = 600
end
end

if response
File.open(LOCAL_XML_PATH, 'wb') do |file|
response.body.each do |chunk|
file.write(chunk)
puts "Started stream_and_save_xml"
uri = URI.parse(WORKABLE_URL)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
request = Net::HTTP::Get.new(uri.request_uri)
begin
http.request(request) do |response|
if response.is_a?(Net::HTTPSuccess)
File.open(LOCAL_XML_PATH, 'wb') do |file|
total_size = response['content-length'].to_i
downloaded_size = 0
response.read_body do |chunk|
file.write(chunk)
downloaded_size += chunk.size
if total_size > 0
puts "Chunk download progress: #{((downloaded_size.to_f / total_size) * 100).round(2)}%"
else
puts "Chunk download progress: #{(downloaded_size.to_f / (downloaded_size + 1) * 100).round(2)}%"
end
end
end
puts "File saved: #{LOCAL_XML_PATH}"
else
puts "Failed to retrieve file: #{response.code} #{response.message}"
end
end
rescue StandardError => e
puts "Request failed: #{e.message}. Retrying..."
retry_request { stream_and_save_xml } # Retry the whole download in case of any error
end
puts "File saved: #{LOCAL_XML_PATH}"
upload_to_s3(LOCAL_XML_PATH, S3_KEY)
else
puts "Failed to save file."
end
end

def retry_request
retries = 0
begin
response = yield
puts "Response status: #{response.status}, Response body length: #{response.body.length}"
if response.status == 429 # Too Many Requests
raise Faraday::Error, "Too Many Requests"
end
response
rescue Faraday::Error => e
puts "Request failed: #{e.message}"
yield
rescue StandardError => e
retries += 1
if retries <= MAX_RETRIES
delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff
Expand All @@ -65,7 +69,6 @@ def retry_request
retry
else
puts "Failed after #{MAX_RETRIES} retries: #{e.message}"
nil
end
end
end
Expand Down Expand Up @@ -95,7 +98,7 @@ def upload_to_s3(local_path)
end

def create_jobs
print("Total number of URLs are #{@urls.count}")
print("Total number of URLs are #{@urls.count}. Please create jobs as per need. \n")
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/import_jobs.rake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ namespace :importer do
desc "Import jobs from Workable"
task import_jobs: :environment do
Importer::Xml::WorkableParserJob.perform_now
puts "Jobs imported successfully."
puts "\nJobs imported successfully."
end
end
end

0 comments on commit 6624631

Please sign in to comment.