public
Description: Make git mirrors of darcs repositories
Homepage: http://www.sanityinc.com/articles/converting-darcs-repositories-to-git
Clone URL: git://github.com/purcell/darcs-to-git.git
Click here to lend your support to: darcs-to-git and make a donation at www.pledgie.com !
darcs-to-git / darcs-to-git
100755 489 lines (411 sloc) 16.188 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
#!/usr/bin/env ruby
##
## Author: Steve Purcell, http://www.sanityinc.com/
## Obtain the latest version of this software here: http://git.sanityinc.com/
##
 
# XXX: make backwards compatible
# TODO: import parallel darcs repos as git branches, identifying branch points
# TODO: use default repo if none was supplied
# TODO: handle *-darcs-backupN files?
 
require 'ostruct'
require 'rexml/document'
require 'optparse'
require 'yaml'
 
# Explicitly setting a time zone would cause darcs to only output in
# that timezone hence we couldn't get the actual patch TZ
# ENV['TZ'] = 'GMT0'
 
GIT_PATCHES = ".git/darcs_patches"
DEFAULT_AUTHOR_MAP_FILE = ".git/darcs_author_substitutions"
 
# -------------------------------------------------------------------------------
# Usage info and argument parsing
# -------------------------------------------------------------------------------
 
OPTIONS = { :default_email => nil,
            :list_authors => false,
            :author_map => nil,
            :clean_commit_messages => false,
            :num_patches => nil }
opts = OptionParser.new do |opts|
  opts.banner = <<-end_usage
Creates git repositories from darcs repositories
 
usage: darcs-to-git DARCSREPODIR [options]
 
 
1. Create an *empty* directory that will become the new git repository
2. From inside that directory, run this program, passing the location
of the local source darcs repo as a parameter
 
The program will git-init the empty directory, and migrate all patches
in the source darcs repo into commits in that repository.
 
Thereafter, incremental patch conversion from the same source repo is
possible by repeating step 2.
 
NOTE: In case of multiple tags, only the first one will be applied.
If you really need to, you can manually identify the patch and use
\"git tag -f <tagname> <sha1-of-commit-before-tagging>\".
 
OPTIONS
 
end_usage
  opts.on('--default-email ADDRESS',
          "Set the email address used when no explicit address is given") do |m|
    OPTIONS[:default_email] = m
  end
  opts.on('--list-authors',
          "List all unique authors in source repo and quit.") do |m|
    OPTIONS[:list_authors] = m
  end
  opts.on('--author-map FILE',
          "Supply a YAML file that maps committer names to canonical author names") do |f|
    OPTIONS[:author_map] = f
  end
  opts.on('--patches [N]', OptionParser::DecimalInteger,
          "Only pull N patches.") do |n|
    abort opts.to_s unless n >= 0
    OPTIONS[:num_patches] = n
  end
  opts.on('--clean-commit-messages',
          "Don't note darcs hashes in git commit messages (not recommended)") do |n|
    OPTIONS[:clean_commit_messages] = true
  end
  opts.on('--version', "Output version information and exit") do
    puts <<-EOF
darcs-to-git 0.1
 
Copyright (c) 2009 Steve Purcell, http://www.sanityinc.com/
 
License MIT: <http://www.opensource.org/licenses/mit-license.php>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
EOF
    exit
  end
 
  opts.on('-h', '--help', "Show this message") do
    puts opts.to_s
    exit
  end
end
opts.parse!
 
SRCREPO = ARGV[0]
if SRCREPO.nil?
  abort opts.to_s
elsif !FileTest.exists?(SRCREPO + '/_darcs')
  abort "#{SRCREPO} is not a valid local darcs repository"
end
 
 
# -------------------------------------------------------------------------------
# Utilities
# -------------------------------------------------------------------------------
def run(*args)
  puts "Running: #{args.inspect}"
  system(*args) || raise("Failed to run: #{args.inspect}")
end
 
def output_of(*args)
  puts "Running: #{args.inspect}"
  output = IO.popen(args.map {|a| "'#{a}'"}.join(' '), 'r') { |p| p.read }
  if $?.exitstatus == 0
    return output
  else
    raise "Failed to run: #{args.inspect}"
  end
end
 
class Symbol
  def to_proc() lambda { |o| o.send(self) } end
end
 
class String
  def darcs_unescape
    # darcs uses '[_\hh_]' to quote non-ascii characters where 'h' is
    # a hexadecimal. We translate this to '=hh' and use ruby's unpack
    # to do replace this with the proper byte.
    gsub(/\[\_\\(..)\_\]/) { |x| "=#{$1}" }.unpack("M*")[0]
  end
end
 
 
# -------------------------------------------------------------------------------
# Map darcs authors to git authors
# -------------------------------------------------------------------------------
class AuthorMap < Hash
  attr_accessor :default_email
 
  def self.load(filename)
    new.merge(YAML.load_file(filename))
  end
 
  # gives the name and email
  def [](author)
    name_and_email(super || author)
  end
 
  private
 
  def name_and_email(author)
    case author
    when /^\s*(\S.*?)\s*\<(\S+@\S+?)\>\s*$/
      [$1, $2]
    when /^\s*\<?(\S+@\S+?)\>?\s*$/
      email = $1
      [email.split('@').first, email]
    else
      [author, default_email]
    end
  end
end
 
# -------------------------------------------------------------------------------
# Storing a history of related darcs and git commits
# -------------------------------------------------------------------------------
 
class CommitHistory
  def initialize(patch_file_name)
    @patch_file_name = patch_file_name
    @darcs_patches_in_git = {}
    if File.exists?(patch_file_name)
      @darcs_patches_in_git = YAML.load_file(patch_file_name)
      unless @darcs_patches_in_git.is_a?(Hash)
        raise "yaml hash not found in #{patch_file_name}"
      end
    else
      # TODO: consider doing this unconditionally, since that
      # might allow merging between repositories created with darcs-to-git
      fill_from_darcs_hash_comments
    end
  end
 
  def record_git_commit(commit_id, identifier)
    # using one file per darcs patch would be an incredible waste of space
    # on my system one file takes up 4K even if only a few bytes are in it
    # hence we just use a simple YAML hash
    @darcs_patches_in_git[identifier] = commit_id
    File.open(@patch_file_name, 'w') do |f|
      YAML.dump(@darcs_patches_in_git, f)
    end
  end
 
  def find_git_commit(is_tag, git_tag_name, identifier)
    return nil if empty_repo?
    if is_tag
      (output_of("git", "tag", "-l") rescue "").split(/\r?\n/).include?(git_tag_name) &&
        output_of("git", "rev-list", "--max-count=1", "tags/#{git_tag_name}").strip
    else
      @darcs_patches_in_git[identifier];
    end
  end
 
  private
 
  def empty_repo?
    !system("git rev-parse --verify HEAD >/dev/null 2>&1")
  end
 
  def fill_from_darcs_hash_comments
    return if empty_repo?
    Array(output_of("git", "log", "--grep=darcs-hash:").split(/^commit /m)[1..-1]).each do |entry|
      commit_id, identifier = entry.scan(/^([a-z0-9]+$).*darcs-hash:(.*?)$/sm).flatten
      record_git_commit(commit_id, identifier)
    end
  end
end
 
# -------------------------------------------------------------------------------
# Reading darcs patches and applying them to a git repo
# -------------------------------------------------------------------------------
 
class DarcsPatch
  attr_accessor :source_repo, :author, :date, :inverted, :identifier, :name, :is_tag, :git_tag_name, :comment
  attr_reader :git_author_name, :git_author_email
 
  def initialize(source_repo, patch_xml)
    self.source_repo = source_repo
    self.author = patch_xml.attribute('author').value.darcs_unescape
    self.date = darcs_date_to_git_date(patch_xml.attribute('date').value,
                                       patch_xml.attribute('local_date').value)
    self.inverted = (patch_xml.attribute('inverted').to_s == 'True')
    self.identifier = patch_xml.attribute('hash').to_s
    self.name = patch_xml.get_elements('name').first.get_text.value.darcs_unescape rescue 'Unnamed patch'
    self.comment = patch_xml.get_elements('comment').first.get_text.value.darcs_unescape rescue nil
    if (self.is_tag = (self.name =~ /^TAG (.*)/))
      self.git_tag_name = $1.gsub(/[\s:]+/, '_')
    end
    @git_author_name, @git_author_email = AUTHOR_MAP[author]
  end
 
  def <=>(other)
    self.identifier <=> other.identifier
  end
 
  def git_commit_message
    [ ((inverted ? "UNDO: #{name}" : name) unless name =~ /^\[\w+ @ \d+\]/),
      comment,
      ("darcs-hash:#{identifier}" unless OPTIONS[:clean_commit_messages])
    ].compact.join("\n\n")
  end
 
  def self.read_from_repo(repo)
    REXML::Document.new(output_of("darcs", "changes", "--reverse",
                                  "--repodir=#{repo}", "--xml",
                                  "--summary")).
      get_elements('changelog/patch').map do |p|
      DarcsPatch.new(repo, p)
    end
  end
 
  # Return committish for corresponding patch in current git repo, or false/nil
  def id_in_git_repo
    @git_commit ||= COMMIT_HISTORY.find_git_commit(is_tag, git_tag_name, identifier)
  end
 
  def pull_and_apply
    puts "\n" + ("=" * 80)
    puts "PATCH : #{name}"
    puts "DATE : #{date}"
    puts "AUTHOR: #{author} => #{git_author_name} <#{git_author_email}>"
    puts "=" * 80
 
    if id_in_git_repo
      puts "Already imported to git as #{id_in_git_repo}"
      return
    end
 
    pull
    system("git", "status")
    commit_to_git_repo
  end
 
  private
 
  def pull
    unless darcs_reports_clean_repo?
      raise "Darcs reports dirty repo before pulling #{identifier}; confused, so aborting"
    end
    run("darcs", "pull", "--all", "--quiet",
        "--match", "hash #{identifier}",
        "--set-scripts-executable", source_repo)
    unless darcs_reports_clean_repo?
      puts "Darcs reports dirty directory: assuming conflict that is fixed by a later patch... reverting"
      run("darcs revert --all")
      run("find . -name '*-darcs-backup0'|xargs rm -f") # darcs2 creates these
    end
    unless darcs_reports_clean_repo?
        system("darcs whatsnew -sl")
        raise "Failed to clean repo, see above"
    end
  end
 
  def darcs_reports_clean_repo?
    `darcs whatsnew -sl | egrep -v '^a (\./)?\.git(/|$)'` =~ /^(No changes!)?$/
  end
 
  def commit_to_git_repo
    ENV['GIT_AUTHOR_NAME'] = ENV['GIT_COMMITTER_NAME'] = git_author_name
    ENV['GIT_AUTHOR_EMAIL'] = ENV['GIT_COMMITTER_EMAIL'] = git_author_email
    ENV['GIT_AUTHOR_DATE'] = ENV['GIT_COMMITTER_DATE'] = date
    if is_tag
      run("git", "tag", "-a", "-m", git_commit_message, git_tag_name)
    else
      if (new_files = git_new_files).any?
        run(*(["git", "add"] + new_files))
      end
      if git_changed_files.any? || new_files.any?
        run("git", "commit", "-a", "-m", git_commit_message)
      end
      # get full id of last commit and associate it with the patch id
      commit_id = output_of("git", "log", "-n1").scan(/^commit ([a-z0-9]+$)/).flatten.first
      COMMIT_HISTORY.record_git_commit(commit_id, identifier)
    end
  end
 
  def darcs_date_to_git_date(utc,local)
    # Calculates a git-friendly date (e.g., timezone CET decribed as
    # +0100) by using the two date fields that darcs gives us: a list
    # of numbers describing the UTC time and a local time formatted in
    # a human-readable format. We could parse the local time and
    # derive the timezone offset from the timezone name. but timezones
    # aren't well-defined, so we ignore the timezone name and instead
    # calculate the timezone offset ourselves by calculating the
    # difference between local time and UTC time.
    if not utc =~ /^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/
      raise "Wrong darcs date format"
    end
    utc_time = Time.utc($1,$2,$3,$4,$5,$6)
    # example: Mon Oct 2 14:23:28 CEST 2006
    # everything except timezone name is fixed-length, if parsing
    # fails we just use UTC
    pat = /^\w\w\w (\w\w\w) ([ 1-9]\d) ([ 0-9]\d)\:(\d\d)\:(\d\d) \w* (\d\d\d\d)/
    local_time = if pat =~ local
                   Time.utc($6,$1,$2,$3,$4,$5)
                 else
                   utc_time
                 end
    offs = local_time - utc_time # time offset in seconds
    t = local_time
    # formats the above example as: 2006-10-02 14:23:28 +0200
    s = sprintf("%4d-%02d-%02d %02d:%02d:%02d %s%02d%02d",
                t.year, t.month, t.day,
                t.hour, t.min, t.sec,
                offs < 0 ? "-" : "+", offs.abs/3600, offs.abs.modulo(3600)/60 )
  end
 
  def git_ls_files(wanted)
    output_of(*["git", "ls-files", "-t", "-o", "-m", "-d", "-z", "-X", ".git/info/exclude"]).scan(/(.?) (.*?)\0/m).map do |code, name|
      name if wanted.include?(code)
    end.compact
  end
  def git_new_files() git_ls_files(["?"]) end
  def git_changed_files() git_ls_files(%w(? R C)) end
end
 
def extract_authors(patches)
  unique_authors = {}
  patches.each do |p|
    unique_authors[p.author] =
      "#{p.git_author_name}" + (p.git_author_email.nil? ? "" : " <#{p.git_author_email}>")
  end
  puts "# You can use the following output as a starting point for an author_map"
  puts "# Just fill in the proper text after the colon; put email addresses in"
  puts "# angle brackets. You can remove any lines that look OK to you."
  # TODO: Can we make the output sorted?
  puts YAML::dump( unique_authors )
end
 
 
# -------------------------------------------------------------------------------
# Pre-flight checks
# -------------------------------------------------------------------------------
 
DARCS_VERSION = output_of(*%w(darcs -v)).scan(/(\d+)\.(\d+)\.(\d+)/).flatten.map {|v| v.to_i}
 
def darcs2_repo?(repo)
  begin
    output_of("darcs", "show", "repo", "--repodir=#{repo}") =~ /Format:.*darcs-2/
  rescue # darcs1 does not have a "show" command, so we get an exception
    false
  end
end
 
class Array; include Comparable; end
 
unless DARCS_VERSION > [1, 0, 7]
  STDERR.write("WARNING: your darcs appears to be old, and may not work with this script\n")
end
 
 
# -------------------------------------------------------------------------------
# Initialise the working area
# -------------------------------------------------------------------------------
ENV['GIT_PAGER'] = ENV['PAGER'] = "cat" # so that pager of git-log doesn't halt conversion
 
unless File.directory?("_darcs")
  puts "Initialising the working area."
 
  darcs_init = %w(darcs init)
  if darcs2_repo?(SRCREPO)
    darcs_init << "--darcs-2"
  elsif DARCS_VERSION >= [2, 0, 0]
    darcs_init << "--old-fashioned-inventory"
  end
  run(*darcs_init)
 
  run("git", "init")
  File.open(".git/info/exclude", "a") { |f| f.write("_darcs\n.DS_Store\n") }
  File.open("_darcs/prefs/boring", "a") { |f| f.write("\\.git$\n\\.DS_Store$\n") }
  # TODO: migrate darcs borings into git excludes?
end
 
 
COMMIT_HISTORY = CommitHistory.new(GIT_PATCHES)
 
 
AUTHOR_MAP = if OPTIONS[:author_map]
               AuthorMap.load(OPTIONS[:author_map])
             elsif File.exists?(DEFAULT_AUTHOR_MAP_FILE)
               AuthorMap.load(DEFAULT_AUTHOR_MAP_FILE)
             else
               AuthorMap.new
             end
AUTHOR_MAP.default_email = OPTIONS[:default_email]
 
 
patches = DarcsPatch.read_from_repo(SRCREPO)
if OPTIONS[:list_authors]
  extract_authors(patches)
  exit(0)
end
 
patches_available = []
while patch = patches.pop
  next if patch.id_in_git_repo
  patches_available.unshift(patch)
end
 
patches_to_pull = if OPTIONS[:num_patches]
                    patches_available.first(OPTIONS[:num_patches])
                  else
                    patches_available
                  end
 
patches_to_pull.each &:pull_and_apply
 
pulled = patches_to_pull.size
if pulled == 0
  puts "\nNothing to pull."
else
  puts "\nPulled #{pulled} patch#{"es" unless pulled == 1}."
  puts "\nDarcs import successful! You may now want to run `git gc' to
improve space usage the git repo"
end
 
 
# -------------------------------------------------------------------------------
# Post-flight checks
# -------------------------------------------------------------------------------
 
# if we didn't pull all patches, then the consistency check would
# fail, so we simply skip it
if patches_to_pull.size == patches_available.size
  puts "Comparing final state with source repo..."
  system("diff", "-ur", "-x", "_darcs", "-x", ".git", ".", SRCREPO)
  if $? != 0
    abort <<-end_msg
!!! There were differences! See diff above for details.
!!! It may be that the source repository was dirty.
!!! Run "cd #{SRCREPO} && darcs whatsnew -sl" to check.
end_msg
  end
end