From 6213f17971feaf1df9d18ff55798f5c8ad078734 Mon Sep 17 00:00:00 2001 From: Jesse McGinnis Date: Thu, 21 Mar 2024 14:11:30 -0400 Subject: [PATCH 1/4] Revert "Revert "Add minitest hooks and make seeding fast"" --- Gemfile | 1 + Gemfile.lock | 3 + app/models/categories_property.rb | 6 ++ app/models/category.rb | 9 ++- app/models/properties_property_value.rb | 6 ++ app/models/property.rb | 9 ++- app/models/property_value.rb | 3 +- app/serializers/data/category_serializer.rb | 35 +++++++-- app/serializers/data/property_serializer.rb | 25 +++++++ .../data/property_value_serializer.rb | 18 ++++- app/serializers/object_serializer.rb | 2 +- application.rb | 2 + db/schema.rb | 6 +- db/seed.rb | 40 +++++----- .../integration/all_data_files_import_test.rb | 73 +++++++++++++++---- 15 files changed, 186 insertions(+), 52 deletions(-) create mode 100644 app/models/categories_property.rb create mode 100644 app/models/properties_property_value.rb diff --git a/Gemfile b/Gemfile index 9d1b538d..fd63586a 100644 --- a/Gemfile +++ b/Gemfile @@ -12,4 +12,5 @@ gem "rubocop-shopify", require: false group :test do gem "minitest", "~> 5.21" + gem "minitest-hooks", "~> 1.5" end diff --git a/Gemfile.lock b/Gemfile.lock index 2f76acea..15a51fc4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -70,6 +70,8 @@ GEM rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.4.0) minitest (5.21.2) + minitest-hooks (1.5.1) + minitest (> 5.3) mutex_m (0.2.0) parallel (1.24.0) parser (3.3.0.5) @@ -128,6 +130,7 @@ DEPENDENCIES byebug jekyll (~> 4.3) minitest (~> 5.21) + minitest-hooks (~> 1.5) rake (~> 13.1) rubocop-shopify sqlite3 (~> 1.7) diff --git a/app/models/categories_property.rb b/app/models/categories_property.rb new file mode 100644 index 00000000..f8f98817 --- /dev/null +++ b/app/models/categories_property.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +class CategoriesProperty < ApplicationRecord + belongs_to :category + belongs_to :property, foreign_key: :property_friendly_id, primary_key: :friendly_id +end diff --git a/app/models/category.rb b/app/models/category.rb index 29751b38..2a7ebdc3 100644 --- a/app/models/category.rb +++ b/app/models/category.rb @@ -7,7 +7,14 @@ class Category < ApplicationRecord has_many :children, class_name: "Category", inverse_of: :parent belongs_to :parent, class_name: "Category", optional: true - has_and_belongs_to_many :properties + + has_and_belongs_to_many :properties, + join_table: :categories_properties, + foreign_key: :category_id, + association_foreign_key: :property_friendly_id + def property_friendly_ids=(ids) + self.properties = Property.where(friendly_id: ids) + end validates :id, presence: { strict: true }, diff --git a/app/models/properties_property_value.rb b/app/models/properties_property_value.rb new file mode 100644 index 00000000..491c44e1 --- /dev/null +++ b/app/models/properties_property_value.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +class PropertiesPropertyValue < ApplicationRecord + belongs_to :property + belongs_to :property_value +end diff --git a/app/models/property.rb b/app/models/property.rb index 433dbf62..de4c7df4 100644 --- a/app/models/property.rb +++ b/app/models/property.rb @@ -3,8 +3,13 @@ class Property < ApplicationRecord default_scope { order(:name) } - has_and_belongs_to_many :categories - has_and_belongs_to_many :property_values + has_and_belongs_to_many :categories, + join_table: :categories_properties, + foreign_key: :property_friendly_id, + association_foreign_key: :category_id + + has_and_belongs_to_many :property_values, + join_table: :properties_property_values validates :name, presence: true diff --git a/app/models/property_value.rb b/app/models/property_value.rb index ee2937e1..483543bc 100644 --- a/app/models/property_value.rb +++ b/app/models/property_value.rb @@ -3,7 +3,8 @@ class PropertyValue < ApplicationRecord default_scope { order(Arel.sql("CASE WHEN name = 'Other' THEN 1 ELSE 0 END, name")) } - has_and_belongs_to_many :properties + has_and_belongs_to_many :properties, + join_table: :properties_property_values validates :name, presence: true diff --git a/app/serializers/data/category_serializer.rb b/app/serializers/data/category_serializer.rb index 58426bd1..d24b121b 100644 --- a/app/serializers/data/category_serializer.rb +++ b/app/serializers/data/category_serializer.rb @@ -3,6 +3,10 @@ module Serializers module Data class CategorySerializer < ObjectSerializer + class << self + delegate(:deserialize_for_insert_all, :deserialize_for_join_insert_all, to: :instance) + end + def serialize(category) { "id" => category.id, @@ -13,15 +17,34 @@ def serialize(category) end def deserialize(hash) - id = hash["id"].downcase - parent_id = id.split("-")[0...-1].join("-").presence - name = hash["name"] - - Category.new(id:, parent_id:, name:).tap do |category| + Category.new(**attributes_from(hash)).tap do |category| category.child_ids = hash["children"] if hash["children"] - category.property_ids = Property.where(friendly_id: hash["attributes"]).pluck(:id) if hash["attributes"] + category.property_friendly_ids = hash["attributes"] if hash["attributes"] end end + + def deserialize_for_insert_all(array) + array.map { attributes_from(_1) } + end + + def deserialize_for_join_insert_all(array) + array.flat_map do |hash| + category_id = hash["id"].downcase + hash["attributes"].map do |property_friendly_id| + { category_id:, property_friendly_id: } + end + end + end + + private + + def attributes_from(hash) + { + id: hash["id"].downcase, + parent_id: hash["id"].split("-")[0...-1].join("-").presence, + name: hash["name"], + } + end end end end diff --git a/app/serializers/data/property_serializer.rb b/app/serializers/data/property_serializer.rb index ed057aea..cb21ebc8 100644 --- a/app/serializers/data/property_serializer.rb +++ b/app/serializers/data/property_serializer.rb @@ -3,6 +3,10 @@ module Serializers module Data class PropertySerializer < ObjectSerializer + class << self + delegate(:deserialize_for_insert_all, :deserialize_for_join_insert_all, to: :instance) + end + def serialize(property) { "id" => property.id, @@ -20,6 +24,27 @@ def deserialize(hash) property_value_ids: hash["values"].map { _1["id"] }, ) end + + def deserialize_for_insert_all(array) + array.map do |hash| + { + id: hash["id"], + friendly_id: hash["friendly_id"], + name: hash["name"], + } + end + end + + def deserialize_for_join_insert_all(array) + array.flat_map do |hash| + hash["values"].map do |value_hash| + { + property_id: hash["id"], + property_value_id: value_hash["id"], + } + end + end + end end end end diff --git a/app/serializers/data/property_value_serializer.rb b/app/serializers/data/property_value_serializer.rb index 73b27295..bff19b32 100644 --- a/app/serializers/data/property_value_serializer.rb +++ b/app/serializers/data/property_value_serializer.rb @@ -3,6 +3,10 @@ module Serializers module Data class PropertyValueSerializer < ObjectSerializer + class << self + delegate(:deserialize_for_insert_all, to: :instance) + end + def serialize(property_value) { "id" => property_value.id, @@ -11,10 +15,20 @@ def serialize(property_value) end def deserialize(hash) - PropertyValue.new( + PropertyValue.new(**attributes_from(hash)) + end + + def deserialize_for_insert_all(array) + array.map { attributes_from(_1) } + end + + private + + def attributes_from(hash) + { id: hash["id"], name: hash["name"], - ) + } end end end diff --git a/app/serializers/object_serializer.rb b/app/serializers/object_serializer.rb index ac51b68f..494f2320 100644 --- a/app/serializers/object_serializer.rb +++ b/app/serializers/object_serializer.rb @@ -5,7 +5,7 @@ class ObjectSerializer include Singleton class << self - delegate :serialize, :deserialize, to: :instance + delegate(:serialize, :deserialize, to: :instance) end def serialize(object) diff --git a/application.rb b/application.rb index ba547ea0..3e56f001 100644 --- a/application.rb +++ b/application.rb @@ -12,6 +12,8 @@ require_relative "app/models/category" require_relative "app/models/property" require_relative "app/models/property_value" +require_relative "app/models/categories_property" +require_relative "app/models/properties_property_value" require_relative "app/serializers/object_serializer" require_relative "app/serializers/data/category_serializer" diff --git a/db/schema.rb b/db/schema.rb index b14c5521..e85d0b72 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -19,10 +19,10 @@ create_table :categories_properties, id: false, force: :cascade do |t| t.string(:category_id, null: false) - t.integer(:property_id, null: false) + t.string(:property_friendly_id, null: false) - t.index([:category_id, :property_id], unique: true) - t.index(:property_id) + t.index([:category_id, :property_friendly_id], unique: true) + t.index(:property_friendly_id) end create_table :properties_property_values, id: false, force: :cascade do |t| t.integer(:property_id, null: false) diff --git a/db/seed.rb b/db/seed.rb index e159e1ae..8726e184 100644 --- a/db/seed.rb +++ b/db/seed.rb @@ -7,39 +7,37 @@ class Seed class << self def attributes_from(data) puts "Importing values" - data.each do |property_json| - property_json["values"].each do |property_json| - property_value = Serializers::Data::PropertyValueSerializer.deserialize(property_json) - by_id = PropertyValue.find_by(id: property_value.id) - - if by_id.nil? - property_value.save! - elsif by_id.name != property_value.name - puts " ⨯ Failed to import value: #{property_value.name} <#{property_value.id}> already exists as " \ - "#{by_id.name} <#{by_id.id}>" - end - end - end + raw_values = data.flat_map { _1.fetch("values") } + values = Serializers::Data::PropertyValueSerializer.deserialize_for_insert_all(raw_values) + PropertyValue.insert_all(values) puts "✓ Imported #{PropertyValue.count} values" puts "Importing properties" - data.each do |json| - Serializers::Data::PropertySerializer.deserialize(json).save! - end + properties = Serializers::Data::PropertySerializer.deserialize_for_insert_all(data) + Property.insert_all(properties) puts "✓ Imported #{Property.count} properties" + + puts "Importing property ↔ value relationships" + joins = Serializers::Data::PropertySerializer.deserialize_for_join_insert_all(data) + PropertiesPropertyValue.insert_all(joins) + puts "✓ Imported #{PropertiesPropertyValue.count} property ↔ value relationships" end def categories_from(data) puts "Importing #{data.count} category verticals" data.each do |vertical_json| puts " → #{vertical_json.first.fetch("name")}" - vertical_json.each do |category_json| - unless Serializers::Data::CategorySerializer.deserialize(category_json.except("children")).save - puts " ⨯ Failed to import category: #{category_json["name"]} <#{category_json["id"]}>" - end - end + categories = Serializers::Data::CategorySerializer.deserialize_for_insert_all(vertical_json) + Category.insert_all(categories) end puts "✓ Imported #{Category.count} categories" + + puts "Importing category relationships" + data.each do |vertical_json| + joins = Serializers::Data::CategorySerializer.deserialize_for_join_insert_all(vertical_json) + CategoriesProperty.insert_all(joins) + end + puts "✓ Imported #{CategoriesProperty.count} category ↔ property relationships" end end end diff --git a/test/integration/all_data_files_import_test.rb b/test/integration/all_data_files_import_test.rb index 39b280d9..d7dfcc0e 100644 --- a/test/integration/all_data_files_import_test.rb +++ b/test/integration/all_data_files_import_test.rb @@ -3,31 +3,68 @@ require_relative "../test_helper" class AllDataFilesImportTest < ActiveSupport::TestCase - def teardown - Category.destroy_all - Property.destroy_all - PropertyValue.destroy_all + include Minitest::Hooks + + def before_all + @raw_attributes_data = YAML.load_file("#{Application.root}/data/attributes/attributes.yml") + DB::Seed.attributes_from(@raw_attributes_data) + + # this will be replaced by values.yml + @unique_raw_values_data = @raw_attributes_data + .flat_map { _1.fetch("values") } + .uniq { _1.fetch("id") } + + # Categories are only successfully parseable if attributes are already present + category_files = Dir.glob("#{Application.root}/data/categories/*.yml") + @raw_verticals_data = category_files.map { YAML.load_file(_1) } + DB::Seed.categories_from(@raw_verticals_data) + end + + test "AttributeValues are correctly imported from attributes.yml" do + assert_equal @unique_raw_values_data.size, PropertyValue.count end - test "DB::Seed imports everything from all data/ files" do - raw_attributes_data = YAML.load_file("#{Application.root}/data/attributes/attributes.yml") - DB::Seed.attributes_from(raw_attributes_data) + test "AttributeValues are consistent with attributes.yml" do + @unique_raw_values_data.each do |raw_value| + deserialized_value = Serializers::Data::PropertyValueSerializer.deserialize(raw_value) + real_value = PropertyValue.find(raw_value.fetch("id")) + + assert_equal deserialized_value, real_value + end + end + + test "AttributeValues are all valid" do + PropertyValue.all.each do |value| + assert_predicate value, :valid? + end + end - assert_equal raw_attributes_data.size, Property.count - raw_attributes_data.each do |raw_attribute| + test "Attributes are correctly imported from attributes.yml" do + assert_equal @raw_attributes_data.size, Property.count + end + + test "Attributes are consistent with attributes.yml" do + @raw_attributes_data.each do |raw_attribute| deserialized_attribute = Serializers::Data::PropertySerializer.deserialize(raw_attribute) real_attribute = Property.find(raw_attribute.fetch("id")) assert_equal deserialized_attribute, real_attribute end + end - category_files = Dir.glob("#{Application.root}/data/categories/*.yml") - raw_verticals_data = category_files.map { YAML.load_file(_1) } - DB::Seed.categories_from(raw_verticals_data) + test "Attributes are all valid" do + Property.all.each do |attribute| + assert_predicate attribute, :valid? + end + end - assert_equal raw_verticals_data.size, Category.verticals.count - assert_equal raw_verticals_data.map(&:size).sum, Category.count - raw_verticals_data.flatten.each do |raw_category| + test "Categories are fully imported from categories/*.yml" do + assert_equal @raw_verticals_data.size, Category.verticals.count + assert_equal @raw_verticals_data.map(&:size).sum, Category.count + end + + test "Categories are consistent with categories/*.yml" do + @raw_verticals_data.flatten.each do |raw_category| deserialized_category = Serializers::Data::CategorySerializer.deserialize(raw_category) real_category = Category.find(raw_category.fetch("id")) @@ -36,4 +73,10 @@ def teardown assert_equal deserialized_category.children, real_category.children end end + + test "Categories are all valid" do + Category.all.each do |category| + assert_predicate category, :valid? + end + end end From c3622d06c07f47b3ae60c359f9c1dc5d2455209d Mon Sep 17 00:00:00 2001 From: Jesse McGinnis Date: Thu, 21 Mar 2024 14:24:33 -0400 Subject: [PATCH 2/4] Back to has_many :through, as it should be --- app/models/category.rb | 6 ++---- app/models/property.rb | 10 ++++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/app/models/category.rb b/app/models/category.rb index 2a7ebdc3..c04aaa63 100644 --- a/app/models/category.rb +++ b/app/models/category.rb @@ -8,10 +8,8 @@ class Category < ApplicationRecord has_many :children, class_name: "Category", inverse_of: :parent belongs_to :parent, class_name: "Category", optional: true - has_and_belongs_to_many :properties, - join_table: :categories_properties, - foreign_key: :category_id, - association_foreign_key: :property_friendly_id + has_many :categories_properties, dependent: :destroy + has_many :properties, through: :categories_properties, foreign_key: :property_friendly_id def property_friendly_ids=(ids) self.properties = Property.where(friendly_id: ids) end diff --git a/app/models/property.rb b/app/models/property.rb index de4c7df4..2092f32f 100644 --- a/app/models/property.rb +++ b/app/models/property.rb @@ -3,13 +3,11 @@ class Property < ApplicationRecord default_scope { order(:name) } - has_and_belongs_to_many :categories, - join_table: :categories_properties, - foreign_key: :property_friendly_id, - association_foreign_key: :category_id + has_many :categories_properties, dependent: :destroy, foreign_key: :property_friendly_id, primary_key: :friendly_id + has_many :categories, through: :categories_properties - has_and_belongs_to_many :property_values, - join_table: :properties_property_values + has_many :properties_property_values, dependent: :destroy + has_many :property_values, through: :properties_property_values validates :name, presence: true From 9be6c549c820a068679111803d08f7c555877ff9 Mon Sep 17 00:00:00 2001 From: Jesse McGinnis Date: Thu, 21 Mar 2024 15:37:42 -0400 Subject: [PATCH 3/4] Add tests to ensure associations are setup correctly --- .../integration/all_data_files_import_test.rb | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/test/integration/all_data_files_import_test.rb b/test/integration/all_data_files_import_test.rb index d7dfcc0e..c9c2cf11 100644 --- a/test/integration/all_data_files_import_test.rb +++ b/test/integration/all_data_files_import_test.rb @@ -79,4 +79,64 @@ def before_all assert_predicate category, :valid? end end + + test "Category ↔ Attribute relationships are consistent with categories/*.yml" do + @raw_verticals_data.flatten.each do |raw_category| + real_category = Category.find(raw_category.fetch("id")) + raw_category.fetch("attributes").each do |friendly_id| + real_attribute = Property.find_by(friendly_id:) + assert_includes real_category.properties, real_attribute + end + end + end + + test "Attribute ↔ Value relationships are consistent with attributes.yml" do + Property.all.each do |attribute| + raw_attribute = @raw_attributes_data.find { _1.fetch("id") == attribute.id } + raw_values = raw_attribute.fetch("values") + real_values = attribute.property_values + + assert_equal raw_values.size, real_values.size + raw_values.each do |raw_value| + real_value = real_values.find { _1.id == raw_value.fetch("id") } + assert_equal Serializers::Data::PropertyValueSerializer.deserialize(raw_value), real_value + end + end + end + + # more fragile, but easier sanity check + test "Snowboards category is fully imported and modeled correctly" do + snowboard_id = "sg-4-17-2-17" + raw_snowboard_category = @raw_verticals_data.flatten.find { _1.fetch("id") == snowboard_id } + snowboard_category = Category.find(snowboard_id) + + assert_equal raw_snowboard_category.fetch("name"), snowboard_category.name + assert_empty snowboard_category.children + + raw_snowboard_attributes = raw_snowboard_category.fetch("attributes").sort + assert_equal raw_snowboard_attributes.size, snowboard_category.properties.size + raw_snowboard_attributes + .zip(snowboard_category.properties.reorder(:friendly_id)) + .each do |friendly_id, property| + assert_equal friendly_id, property.friendly_id + end + end + + # more fragile, but easier sanity check + test "Color attribute <1> is fully imported and modeled correctly" do + color_id = 1 + raw_color_attribute = @raw_attributes_data.find { _1.fetch("id") == color_id } + color_attribute = Property.find(color_id) + + assert_equal raw_color_attribute.fetch("name"), color_attribute.name + + raw_color_values = raw_color_attribute.fetch("values").sort_by { _1.fetch("id") } + assert_equal raw_color_values.size, color_attribute.property_values.size + raw_color_values + .zip(color_attribute.property_values.reorder(:id)) + .each do |raw_value, value| + assert_equal raw_value.fetch("id"), value.id + assert_equal raw_value.fetch("name"), value.name + end + end end From 1b250d0a2cf2e0d94b8b9ff66fef78badb897651 Mon Sep 17 00:00:00 2001 From: Jesse McGinnis Date: Thu, 21 Mar 2024 15:55:57 -0400 Subject: [PATCH 4/4] Now let's make the tests simpler --- .../integration/all_data_files_import_test.rb | 74 +++++++++---------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/test/integration/all_data_files_import_test.rb b/test/integration/all_data_files_import_test.rb index c9c2cf11..e3bd8713 100644 --- a/test/integration/all_data_files_import_test.rb +++ b/test/integration/all_data_files_import_test.rb @@ -84,59 +84,53 @@ def before_all @raw_verticals_data.flatten.each do |raw_category| real_category = Category.find(raw_category.fetch("id")) raw_category.fetch("attributes").each do |friendly_id| - real_attribute = Property.find_by(friendly_id:) - assert_includes real_category.properties, real_attribute + property = Property.find_by(friendly_id:) + assert_includes real_category.properties, property end end end test "Attribute ↔ Value relationships are consistent with attributes.yml" do - Property.all.each do |attribute| - raw_attribute = @raw_attributes_data.find { _1.fetch("id") == attribute.id } - raw_values = raw_attribute.fetch("values") - real_values = attribute.property_values - - assert_equal raw_values.size, real_values.size - raw_values.each do |raw_value| - real_value = real_values.find { _1.id == raw_value.fetch("id") } - assert_equal Serializers::Data::PropertyValueSerializer.deserialize(raw_value), real_value + @raw_attributes_data.each do |raw_attribute| + property = Property.find(raw_attribute.fetch("id")) + raw_attribute.fetch("values").each do |raw_property_value| + property_value = PropertyValue.find(raw_property_value.fetch("id")) + assert_includes property.property_values, property_value end end end # more fragile, but easier sanity check test "Snowboards category is fully imported and modeled correctly" do - snowboard_id = "sg-4-17-2-17" - raw_snowboard_category = @raw_verticals_data.flatten.find { _1.fetch("id") == snowboard_id } - snowboard_category = Category.find(snowboard_id) - - assert_equal raw_snowboard_category.fetch("name"), snowboard_category.name - assert_empty snowboard_category.children - - raw_snowboard_attributes = raw_snowboard_category.fetch("attributes").sort - assert_equal raw_snowboard_attributes.size, snowboard_category.properties.size - raw_snowboard_attributes - .zip(snowboard_category.properties.reorder(:friendly_id)) - .each do |friendly_id, property| - assert_equal friendly_id, property.friendly_id - end + snowboard = Category.find("sg-4-17-2-17") + + assert_equal "Snowboards", snowboard.name + assert_empty snowboard.children + + real_property_friendly_ids = snowboard.properties.pluck(:friendly_id) + assert_equal 8, real_property_friendly_ids.size + assert_includes real_property_friendly_ids, "age_group" + assert_includes real_property_friendly_ids, "color" + assert_includes real_property_friendly_ids, "pattern" + assert_includes real_property_friendly_ids, "recommended_skill_level" + assert_includes real_property_friendly_ids, "snowboard_design" + assert_includes real_property_friendly_ids, "snowboarding_style" + assert_includes real_property_friendly_ids, "target_gender" + assert_includes real_property_friendly_ids, "snowboard_construction" end # more fragile, but easier sanity check - test "Color attribute <1> is fully imported and modeled correctly" do - color_id = 1 - raw_color_attribute = @raw_attributes_data.find { _1.fetch("id") == color_id } - color_attribute = Property.find(color_id) - - assert_equal raw_color_attribute.fetch("name"), color_attribute.name - - raw_color_values = raw_color_attribute.fetch("values").sort_by { _1.fetch("id") } - assert_equal raw_color_values.size, color_attribute.property_values.size - raw_color_values - .zip(color_attribute.property_values.reorder(:id)) - .each do |raw_value, value| - assert_equal raw_value.fetch("id"), value.id - assert_equal raw_value.fetch("name"), value.name - end + test "Snowboard construction attribute <2894> is fully imported and modeled correctly" do + snowboard_construction = Property.find(2894) + + assert_equal "Snowboard construction", snowboard_construction.name + assert_equal "snowboard_construction", snowboard_construction.friendly_id + + real_value_ids = snowboard_construction.property_value_ids + assert_equal 4, real_value_ids.size + assert_includes real_value_ids, 1363 # Flat + assert_includes real_value_ids, 7083 # Hybrid + assert_includes real_value_ids, 7236 # Camber + assert_includes real_value_ids, 7237 # Rocker end end