Skip to content
This repository has been archived by the owner on May 8, 2024. It is now read-only.

Commit

Permalink
Merge pull request datadryad#281 from CDL-Dryad/populate-stats
Browse files Browse the repository at this point in the history
This is for processing stats manually from our own files.
  • Loading branch information
sfisher committed Sep 23, 2019
2 parents b8d9f71 + 7a828f2 commit 14eea2d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 2 deletions.
2 changes: 1 addition & 1 deletion stash_engine/app/models/stash_engine/counter_stat.rb
Expand Up @@ -29,7 +29,7 @@ def update_if_necessary
# we should have a counter stat already if it got to this class
# only update stats if it's after the date of the last updated date for record
return unless new_record? || updated_at.nil? || Time.new.utc.to_date > updated_at.to_date
update_usage!
# update_usage!
update_citation_count!
self.updated_at = Time.new.utc # seem to need this for some reason

Expand Down
14 changes: 13 additions & 1 deletion stash_engine/lib/tasks/counter.rake
@@ -1,6 +1,7 @@
require 'net/scp'
require_relative 'counter/validate_file'
require_relative 'counter/log_combiner'
require_relative 'counter/json_stats'

# rubocop:disable Metrics/BlockLength
namespace :counter do
Expand Down Expand Up @@ -34,7 +35,18 @@ namespace :counter do
exit # makes the arguments not be interpreted as other rake tasks
end # end of task

desc 'test environment is passed in'
desc 'manually populate CoP stats from json files'
task cop_manual: :environment do
puts "JSON_DIRECTORY is #{ENV['JSON_DIRECTORY']}"

Dir.glob(File.join(ENV['JSON_DIRECTORY'], '*.json')).sort.each do |f|
puts f
js = JsonStats.new(f)
js.update_stats
end
end

desc 'test that environment is passed in'
task :test_env do
puts "LOG_DIRECTORY is set as #{ENV['LOG_DIRECTORY']}" if ENV['LOG_DIRECTORY']
puts "SCP_HOSTS are set as #{ENV['SCP_HOSTS'].split(' ')}" if ENV['SCP_HOSTS']
Expand Down
59 changes: 59 additions & 0 deletions stash_engine/lib/tasks/counter/json_stats.rb
@@ -0,0 +1,59 @@
require 'json'
require 'byebug'

# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
class JsonStats

def initialize(filename)
@stats = JSON.parse(File.read(filename))
end

def update_stats
datasets = @stats['report_datasets']
datasets.each_with_index do |ds, idx|
puts " #{idx}/#{datasets.length} processed" if idx % 100 == 0

next if ds['dataset-id'].blank? || ds['dataset-id'].first.blank? || ds['dataset-id'].first['value'].blank? || ds['performance'].blank?

doi = ds['dataset-id'].first['value']

unique_request = 0
unique_invest = 0

ds['performance'].each do |perf|
next if perf.blank? || perf['instance'].blank?

perf['instance'].each do |instance|
# make sure all this crap is valid before doing anything with it
next if instance['access-method'].blank? || !%w[machine regular].include?(instance['access-method'])
next if instance['metric-type'].blank? || !%w[unique-dataset-investigations unique-dataset-requests].include?(instance['metric-type'])
next if instance['count'].blank? || !instance['count'].integer?
if instance['metric-type'] == 'unique-dataset-investigations'
unique_invest += instance['count']
elsif instance['metric-type'] == 'unique-dataset-requests'
unique_request += instance['count']
end
end
end

# puts "#{doi} request: #{unique_request} invest: #{unique_invest}"
update_database(doi: doi, request: unique_request, invest: unique_invest)
end
end

def update_database(doi:, request:, invest:)
doi.strip!
doi_obj = StashEngine::Identifier.find_by_identifier(doi)
return if doi_obj.nil?

stat = doi_obj.counter_stat
stat.unique_investigation_count += invest
stat.unique_request_count += request
# these are needed to keep the citations rolling
stat.created_at = Time.new - 48.hours
stat.updated_at = Time.new - 48.hours
stat.save
end
end

# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity

0 comments on commit 14eea2d

Please sign in to comment.