Skip to content

Commit

Permalink
Add bulk export functionality to DefraRuby::Exporters (#105)
Browse files Browse the repository at this point in the history
https://eaflood.atlassian.net/browse/RUBY-68

The second half of the functionality for the DefraRuby::Exporters library is to add bulk exports. This is the feature to generate CSV files containing all of the registration exemptions data in the database.
  • Loading branch information
Edward Minnett committed Mar 29, 2019
1 parent c788230 commit 48813fc
Show file tree
Hide file tree
Showing 35 changed files with 1,424 additions and 102 deletions.
10 changes: 7 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ EMAIL_SERVICE_EMAIL=""
EXPORT_SERVICE_BATCH_SIZE=10
EXPORT_SERVICE_EPR_EXPORT_TIME="1:05"
EXPORT_SERVICE_CRON_LOG_OUTPUT_PATH='/home/rails/waste-exemptions-back-office/shared/log/'
EXPORT_SERVICE_BULK_NUMBER_OF_MONTHS=1
# This can be any day of the week or 'day' for daily.
EXPORT_SERVICE_BULK_EXPORT_FREQUENCY="sunday"
EXPORT_SERVICE_BULK_EXPORT_TIME="20:05"

# AWS config
AWS_MANUAL_EXPORT_ACCESS_KEY_ID=<key_id>
AWS_MANUAL_EXPORT_SECRET_ACCESS_KEY=<secret_key>
AWS_MANUAL_EXPORT_BUCKET=<bucket_name>
AWS_BULK_EXPORT_ACCESS_KEY_ID=<key_id>
AWS_BULK_EXPORT_SECRET_ACCESS_KEY=<secret_key>
AWS_BULK_EXPORT_BUCKET=<bucket_name>

AWS_DAILY_EXPORT_ACCESS_KEY_ID=<key_id>
AWS_DAILY_EXPORT_SECRET_ACCESS_KEY=<secret_key>
Expand Down
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@ env:
- AWS_DAILY_EXPORT_ACCESS_KEY_ID=keyid
- AWS_DAILY_EXPORT_SECRET_ACCESS_KEY=secretkey
- AWS_DAILY_EXPORT_BUCKET=daily-exports
- AWS_BULK_EXPORT_ACCESS_KEY_ID=keyid
- AWS_BULK_EXPORT_SECRET_ACCESS_KEY=secretkey
- AWS_BULK_EXPORT_BUCKET=bulk-exports
- EXPORT_SERVICE_EPR_EXPORT_TIME="2:05"
- EXPORT_SERVICE_CRON_LOG_OUTPUT_PATH="/home/rails/waste-exemptions-back-office/shared/log/"
- EXPORT_SERVICE_BULK_NUMBER_OF_MONTHS=1
- EXPORT_SERVICE_BULK_EXPORT_FREQUENCY="friday"
- EXPORT_SERVICE_BULK_EXPORT_TIME="16:05"

language: ruby
rvm: 2.4.2
Expand Down
9 changes: 9 additions & 0 deletions app/controllers/bulk_exports_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

class BulkExportsController < ApplicationController
def show
authorize! :read, DefraRuby::Exporters::RegistrationBulkExportReport

@bulk_exports = BulkExportsPresenter.new
end
end
2 changes: 1 addition & 1 deletion app/models/ability.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ def permissions_for_data_agent
can :use_back_office, :all
can :read, WasteExemptionsEngine::Registration
can :read, WasteExemptionsEngine::TransientRegistration
can :export, WasteExemptionsEngine::Registration
can :read, DefraRuby::Exporters::RegistrationBulkExportReport
end
end
44 changes: 44 additions & 0 deletions app/presenters/bulk_exports_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# frozen_string_literal: true

class BulkExportsPresenter

attr_reader :links, :exported_at_message

def initialize
init_exported_at_message
init_links
end

private

def init_exported_at_message
export_executed_at = DefraRuby::Exporters::BulkExportFile.first&.created_at
msg = I18n.t(".not_yet_executed")
msg = I18n.t(".exported_at", export_executed_at: export_executed_at) if export_executed_at.present?
@exported_at_message = msg
end

def init_links
@links = DefraRuby::Exporters::BulkExportFile.all.map do |bulk_export_file|
construct_link_data(bulk_export_file.file_name)
end

@links.sort_by! { |h| h[:start_date] }.reverse!
end

def construct_link_data(file_name)
date_range_description = file_name.split("_").last.sub(".csv", "")
date_range = DefraRuby::Exporters::Helpers::DateRange.parse_date_range_description(date_range_description)
{
start_date: date_range.first,
url: DefraRuby::Exporters::RegistrationExportService.presigned_url(:bulk, file_name),
text: link_text(date_range)
}
end

def link_text(date_range)
start_month = date_range.first.strftime("%B %Y")
end_month = date_range.last.strftime("%B %Y")
start_month == end_month ? start_month : "#{start_month} through #{end_month}"
end
end
26 changes: 26 additions & 0 deletions app/views/bulk_exports/show.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<div class="grid-row">
<div class="column-full">
<%= render("waste_exemptions_engine/shared/back", back_path: root_path) %>

<h1 class="heading-large">
<%= t(".heading") %>
</h1>
<h4 class="heading-small">
<%= @bulk_exports.exported_at_message %>
</h4>
</div>
</div>

<% if @bulk_exports.links.any? %>
<div class="grid-row">
<div class="column-full">
<ul>
<% @bulk_exports.links.each do |link_data| %>
<li>
<%= link_to link_data[:text], link_data[:url] %>
</li>
<% end %>
</ul>
</div>
</div>
<% end %>
6 changes: 6 additions & 0 deletions app/views/layouts/application.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
main_app.users_path %>
</li>
<% end %>
<% if can?(:read, DefraRuby::Exporters::RegistrationBulkExportReport) %>
<li>
<%= link_to t("layouts.application.menu.exports"),
main_app.bulk_exports_path %>
</li>
<% end %>
</ul>
</nav>
<% else %>
Expand Down
35 changes: 21 additions & 14 deletions config/initializers/defra_ruby_exporters.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
# frozen_string_literal: true

module DefraRuby
module Exporters
def self.raise_missing_env_var(variable)
raise("Environment variable #{variable} has not been set")
end
require_relative "../../lib/defra_ruby/exporters/configuration"

BATCH_SIZE = (ENV["EXPORT_SERVICE_BATCH_SIZE"] || 3000).to_i

AWS_REGION = (ENV["AWS_REGION"] || "eu-west-1")
DefraRuby::Exporters.configure do |c|
def raise_missing_env_var(variable)
raise("Environment variable #{variable} has not been set")
end

EPR_EXPORT_AWS_CREDENTIALS = Aws::Credentials.new(
(ENV["AWS_DAILY_EXPORT_ACCESS_KEY_ID"] || raise_missing_env_var("AWS_DAILY_EXPORT_ACCESS_KEY_ID")),
(ENV["AWS_DAILY_EXPORT_SECRET_ACCESS_KEY"] || raise_missing_env_var("AWS_DAILY_EXPORT_SECRET_ACCESS_KEY"))
)
c.batch_size = ENV["EXPORT_SERVICE_BATCH_SIZE"].to_i if ENV["EXPORT_SERVICE_BATCH_SIZE"].present?
c.aws_region = ENV["AWS_REGION"] if ENV["AWS_REGION"].present?

EPR_EXPORT_S3_BUCKET = (ENV["AWS_DAILY_EXPORT_BUCKET"] || raise_missing_env_var("AWS_DAILY_EXPORT_BUCKET"))
c.epr_export_aws_credentials = Aws::Credentials.new(
(ENV["AWS_DAILY_EXPORT_ACCESS_KEY_ID"] || raise_missing_env_var("AWS_DAILY_EXPORT_ACCESS_KEY_ID")),
(ENV["AWS_DAILY_EXPORT_SECRET_ACCESS_KEY"] || raise_missing_env_var("AWS_DAILY_EXPORT_SECRET_ACCESS_KEY"))
)
c.epr_export_s3_bucket = (ENV["AWS_DAILY_EXPORT_BUCKET"] || raise_missing_env_var("AWS_DAILY_EXPORT_BUCKET"))
c.epr_export_filename = "waste_exemptions_epr_daily_full"

EPR_EXPORT_FILENAME = "waste_exemptions_epr_daily_full.csv"
c.bulk_export_aws_credentials = Aws::Credentials.new(
(ENV["AWS_BULK_EXPORT_ACCESS_KEY_ID"] || raise_missing_env_var("AWS_BULK_EXPORT_ACCESS_KEY_ID")),
(ENV["AWS_BULK_EXPORT_SECRET_ACCESS_KEY"] || raise_missing_env_var("AWS_BULK_EXPORT_SECRET_ACCESS_KEY"))
)
c.bulk_export_s3_bucket = (ENV["AWS_BULK_EXPORT_BUCKET"] || raise_missing_env_var("AWS_BULK_EXPORT_BUCKET"))
c.bulk_export_filename_base = "waste_exemptions_bulk_export"
if ENV["EXPORT_SERVICE_BULK_NUMBER_OF_MONTHS"].present?
c.bulk_export_number_of_months = ENV["EXPORT_SERVICE_BULK_NUMBER_OF_MONTHS"].to_i
end
end
7 changes: 7 additions & 0 deletions config/locales/bulk_exports.en.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
en:
bulk_exports:
show:
title: "Data Exports"
heading: "Data Exports"
not_yet_exported: "The files have not yet been generated"
exported_at: "These files were created at %{export_executed_at}"
1 change: 1 addition & 0 deletions config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ en:
menu:
dashboard: "Dashboard"
users: "Manage users"
exports: "Data exports"
shared:
select_role:
roles:
Expand Down
4 changes: 4 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
post "/users/activate/:id", to: "user_activations#activate", as: :activate_user
post "/users/deactivate/:id", to: "user_activations#deactivate", as: :deactivate_user

# Bulk Exports

get "/data-exports", to: "bulk_exports#show", as: :bulk_exports

# Registration management

resources :registrations, only: :show, param: :reference
Expand Down
8 changes: 8 additions & 0 deletions config/schedule.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@
every :day, at: (ENV["EXPORT_SERVICE_EPR_EXPORT_TIME"] || "1:05"), roles: [:db] do
rake "defra_ruby_exporters:epr"
end

# This is the bulk export job. When run this will create batched CSV exports of
# all records and put these files into an AWS S3 bucket.
bulk_frequency = (ENV["EXPORT_SERVICE_BULK_EXPORT_FREQUENCY"] || :sunday).to_sym
bulk_time = (ENV["EXPORT_SERVICE_BULK_EXPORT_TIME"] || "20:05")
every bulk_frequency, at: bulk_time, roles: [:db] do
rake "defra_ruby_exporters:bulk"
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class CreateDefraRubyExportersBulkExportFiles < ActiveRecord::Migration
def change
create_table :defra_ruby_exporters_bulk_export_files do |t|
t.string :file_name

t.timestamps null: false
end
end
end
8 changes: 7 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20190326133420) do
ActiveRecord::Schema.define(version: 20190327134526) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -43,6 +43,12 @@

add_index "addresses", ["registration_id"], name: "index_addresses_on_registration_id", using: :btree

create_table "defra_ruby_exporters_bulk_export_files", force: :cascade do |t|
t.string "file_name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "exemptions", force: :cascade do |t|
t.integer "category"
t.string "code"
Expand Down
9 changes: 9 additions & 0 deletions lib/defra_ruby/exporters/bulk_export_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

module DefraRuby
module Exporters
class BulkExportFile < ActiveRecord::Base
self.table_name = :defra_ruby_exporters_bulk_export_files
end
end
end
52 changes: 52 additions & 0 deletions lib/defra_ruby/exporters/configuration.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

module DefraRuby
module Exporters
class << self
attr_accessor :configuration
end

def self.configure
self.configuration ||= Configuration.new
yield(configuration)
end

class Configuration
ATTRIBUTES = %i[
batch_size
aws_region
epr_export_aws_credentials
epr_export_s3_bucket
epr_export_filename
bulk_export_aws_credentials
bulk_export_s3_bucket
bulk_export_filename_base
bulk_export_number_of_months
].freeze

attr_accessor(*ATTRIBUTES)

def initialize
@batch_size = 3000
@aws_region = "eu-west-1"
@bulk_export_number_of_months = 1
end

def ensure_valid
missing_attributes = ATTRIBUTES.select { |a| public_send(a).nil? }
return true if missing_attributes.empty?

raise "The following DefraRuby::Exporters configuration attributes are missing: #{missing_attributes}"
end

def aws_config(export_type)
case export_type
when :epr
{ credentials: epr_export_aws_credentials, bucket: epr_export_s3_bucket, region: aws_region }
when :bulk
{ credentials: bulk_export_aws_credentials, bucket: bulk_export_s3_bucket, region: aws_region }
end
end
end
end
end
64 changes: 64 additions & 0 deletions lib/defra_ruby/exporters/helpers/date_range.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# frozen_string_literal: true

require "date"

module DefraRuby
module Exporters
module Helpers
module DateRange
# Use January 1st as the starting point for generating the date ranges.
BASE_MONTH = 1
BASE_DAY = 1

# Given an arbitrary open_date, arbitrary close_date, the length of each range in months,
# and base day of the year, return the date ranges needed to encompass the start and end dates.
# For example:
# > ranges = self.generate_date_ranges(DateTime.new(2019, 02, 05), DateTime.new(2019, 03, 17), 2)
# The following are true
# > ranges.count == 2
# > ranges.first == DateTime.new(2019, 01, 01)..DateTime.new(2019, 02, 28)
# > ranges.last == DateTime.new(2019, 03, 01)..DateTime.new(2019, 04, 30)
def self.generate_date_ranges(open_date, close_date, range_months)
# Make sure the open_date is before the close_date
open_date, close_date = [open_date, close_date].sort
ranges = []
date_range = create_date_range(initial_date(open_date), range_months)
loop do
ranges << date_range if date_in_or_before_range?(open_date, date_range)
return ranges if final_range?(close_date, date_range)

date_range = create_date_range(date_range.last + 1.day, range_months)
end
end

def self.describe_date_range(range)
"#{range.first.strftime('%Y%m%d')}-#{range.last.strftime('%Y%m%d')}"
end

def self.parse_date_range_description(description)
first_date, second_date = description.split("-")
Date.parse(first_date)..Date.parse(second_date)
end

private_class_method def self.initial_date(open_date)
date = Date.new(open_date.year, BASE_MONTH, BASE_DAY)
# This is necessary if the base date changes to anything other than January 1st:
date -= 1.year if open_date < date
date
end

private_class_method def self.create_date_range(start_date, num_months)
start_date..(start_date + num_months.months - 1.day)
end

private_class_method def self.date_in_or_before_range?(date, range)
range.include?(date) || date < range.first
end

private_class_method def self.final_range?(end_date, range)
range.include?(end_date)
end
end
end
end
end
Loading

0 comments on commit 48813fc

Please sign in to comment.