From 09467fb85ebd01329c34dd5682496d8473547e13 Mon Sep 17 00:00:00 2001 From: Bilal-Abbas-Gigalabs Date: Wed, 3 Jul 2024 22:44:49 +0500 Subject: [PATCH] initial files for workable parser and uploading --- app/jobs/importer/xml/workable_parser_job.rb | 12 +++ app/services/importer/xml/workable_parser.rb | 102 ++++++++++++++++++ lib/tasks/import_jobs.rake | 9 ++ .../importer/xml/workable_parser_job_spec.rb | 13 +++ spec/tasks/import_jobs_rake_spec.rb | 14 +++ 5 files changed, 150 insertions(+) create mode 100644 app/jobs/importer/xml/workable_parser_job.rb create mode 100644 app/services/importer/xml/workable_parser.rb create mode 100644 lib/tasks/import_jobs.rake create mode 100644 spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb create mode 100644 spec/tasks/import_jobs_rake_spec.rb diff --git a/app/jobs/importer/xml/workable_parser_job.rb b/app/jobs/importer/xml/workable_parser_job.rb new file mode 100644 index 000000000..4c98dbc41 --- /dev/null +++ b/app/jobs/importer/xml/workable_parser_job.rb @@ -0,0 +1,12 @@ +module Importer + module Xml + class WorkableParserJob < ApplicationJob + queue_as :default + retry_on StandardError, attempts: 0 + + def perform + Importer::Xml::WorkableParser.new.import_jobs + end + end + end +end \ No newline at end of file diff --git a/app/services/importer/xml/workable_parser.rb b/app/services/importer/xml/workable_parser.rb new file mode 100644 index 000000000..778fc21f6 --- /dev/null +++ b/app/services/importer/xml/workable_parser.rb @@ -0,0 +1,102 @@ +module Importer + module Xml + class WorkableParser < ApplicationService + WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' + LOCAL_XML_PATH = 'workable.xml' + S3_BUCKET = 'your bucket name' # please enter your bucket name + S3_REGION = 'your regoin' # please enter your bucket region + S3_KEY = 'workable.xml' + REDIRECTED_URLS_PATH = 'redirected_urls.json' + MAX_RETRIES = 5 + RETRY_DELAY = 5 + + def initialize + puts("Started parser initializer") + @s3_client = Aws::S3::Client.new(region: S3_REGION) + end + + def import_jobs + # stream_and_save_xml + parse_xml + save_and_upload + create_jobs + end + + private + + def stream_and_save_xml + puts("Started stream_and_save_xml") + response = retry_request do + Faraday.get(WORKABLE_URL) do |req| + req.options.timeout = 600 + req.options.open_timeout = 600 + end + end + + if response + File.open(LOCAL_XML_PATH, 'wb') do |file| + response.body.each do |chunk| + file.write(chunk) + end + end + puts "File saved: #{LOCAL_XML_PATH}" + upload_to_s3(LOCAL_XML_PATH, S3_KEY) + else + puts "Failed to save file." + end + end + + def retry_request + retries = 0 + begin + response = yield + puts "Response status: #{response.status}, Response body length: #{response.body.length}" + if response.status == 429 # Too Many Requests + raise Faraday::Error, "Too Many Requests" + end + response + rescue Faraday::Error => e + puts "Request failed: #{e.message}" + retries += 1 + if retries <= MAX_RETRIES + delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff + puts "Retrying in #{delay} seconds..." + sleep delay + retry + else + puts "Failed after #{MAX_RETRIES} retries: #{e.message}" + nil + end + end + end + + def parse_xml + puts("Started parse_xml") + if File.exist?(LOCAL_XML_PATH) + @doc = Nokogiri::XML(File.open(LOCAL_XML_PATH)) + @urls = @doc.xpath('//url').map { |url| url.text.strip } + else + puts "File not found: #{LOCAL_XML_PATH}" + end + end + + def save_and_upload + puts("Started save_and_upload") + File.open(REDIRECTED_URLS_PATH, 'w') { |file| file.write(@urls.to_json) } + upload_to_s3(REDIRECTED_URLS_PATH) + end + + def upload_to_s3(local_path) + puts("Started upload_to_s3") + File.open(local_path, 'rb') do |file| + @s3_client.put_object(bucket: S3_BUCKET, key: local_path, body: file) + end + puts "Uploaded #{local_path} to S3 bucket #{S3_BUCKET} as #{local_path}" + end + + def create_jobs + print("Total number of URLs are #{@urls.count}") + end + end + end +end \ No newline at end of file diff --git a/lib/tasks/import_jobs.rake b/lib/tasks/import_jobs.rake new file mode 100644 index 000000000..55a49f8da --- /dev/null +++ b/lib/tasks/import_jobs.rake @@ -0,0 +1,9 @@ +namespace :importer do + namespace :xml do + desc "Import jobs from Workable" + task import_jobs: :environment do + Importer::Xml::WorkableParserJob.perform_now + puts "Jobs imported successfully." + end + end +end \ No newline at end of file diff --git a/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb b/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb new file mode 100644 index 000000000..078fd098b --- /dev/null +++ b/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb @@ -0,0 +1,13 @@ +require 'rails_helper' + +RSpec.describe Importer::Xml::WorkableParserJob, type: :job do + describe '#perform' do + it 'calls import_jobs on WorkableParser' do + parser = instance_double("Importer::Xml::WorkableParser") + allow(Importer::Xml::WorkableParser).to receive(:new).and_return(parser) + expect(parser).to receive(:import_jobs) + + described_class.perform_now + end + end +end \ No newline at end of file diff --git a/spec/tasks/import_jobs_rake_spec.rb b/spec/tasks/import_jobs_rake_spec.rb new file mode 100644 index 000000000..6cfb0b46e --- /dev/null +++ b/spec/tasks/import_jobs_rake_spec.rb @@ -0,0 +1,14 @@ +require 'rails_helper' +require 'rake' + +RSpec.describe 'importer:xml:import_jobs', type: :task do + before :all do + Rake.application.rake_require 'tasks/import_jobs' + Rake::Task.define_task(:environment) + end + + it 'executes the import_jobs task successfully' do + expect(Importer::Xml::WorkableParserJob).to receive(:perform_now) + Rake::Task['importer:xml:import_jobs'].invoke + end +end \ No newline at end of file