#!/usr/bin/env python
# -*- coding: utf-8 -*-
# git-restore-mtime - Change mtime of files based on commit date of last change
# Copyright (C) 2012 Rodrigo Silva (MestreLion) <>
# Change the modification time (mtime) of all files in work tree, based on the
# date of the most recent commit that modified the file.
# Useful prior to generating release tarballs, so each file is archived with a
# date that resembles the date when the file was actually last modified.
# (assuming the actual modification date and its commit date are close)
# By default ignores all ignored and untracked files, and also refuses to work
# on trees with uncommitted changes.
if __name__ != "__main__":
raise ImportError("%s should not be used as a module." % __name__)
import subprocess, shlex
import sys, os.path
import logging as logger
import argparse
import time
if os.utime in getattr(os, 'supports_follow_symlinks', []):
def lutime(path, times):
os.utime(path, times, follow_symlinks=False)
def lutime(path, times):
if os.path.islink(path):
raise Warning('Unable to update symlink: ' + path)
os.utime(path, times)
parser = argparse.ArgumentParser(
description='Restore original modification time of files based on '
'the date of the most recent commit that modified them. '
'Useful when generating release tarballs.')
parser.add_argument('--quiet', '-q',
help='suppress informative messages and summary statistics.')
parser.add_argument('--verbose', '-v',
help='print additional information for each processed file. '
'Overwrites --quiet.')
parser.add_argument('--force', '-f',
help='force execution on trees with uncommitted changes.')
parser.add_argument('--merge', '-m',
help='include merge commits. Leads to more recent mtimes '
'and more files per commit, thus with the same mtime '
'(which may or may not be what you want). Including '
'merge commits may lead to less commits being evaluated '
'(all files are found sooner), which improves performance, '
'sometimes substantially. But since merge commits are '
'usually huge, processing them may also take longer, '
'sometimes substantially. By default merge logs are only '
'used for files missing from regular commit logs.')
help='pass --first-parent to git whatchanged to hide the '
'second parent from the merge commit logs. Only has any '
'effect if --merge is also specified or --skip-missing '
'is not specified and there were files not found in regular '
'commit logs.')
parser.add_argument('--skip-missing', '-s',
action="store_false", default=True, dest='missing',
help='do not try to find missing files. If some files were '
'not found in regular commit logs, by default it re-tries '
'using merge commit logs for these files (if --merge was '
'not used already). This option disables this behavior, '
'which may slightly improve performance, but files '
'found only in merge commits will not be updated.')
parser.add_argument('--no-directories', '-D',
action="store_false", default=True, dest='dirs',
help='do not update directory mtime for files created, '
'renamed or deleted in it. Note: just modifying a file '
'will not update its directory mtime.')
parser.add_argument('--test', '-t',
action="store_true", default=False, dest='test',
help='test run: do not actually update any file')
parser.add_argument('--commit-time', '-c',
action='store_const', const='%ct', default='%at', dest='timeformat',
help='use commit time instead of author time')
nargs='*', default=[os.path.curdir],
help='only modify paths (dirs or files) matching PATHSPEC, '
'relative to current directory. '
'Default is to modify all non-ignored, tracked files.')
help='specify where the work tree is. '
'Default for most repositories is current directory.')
help='specify where the git repository is. '
'Default for most repositories <work-tree>/.git')
args = parser.parse_args()
gitcmd = ['git']
if args.workdir: gitcmd.append("--work-tree=%s" % args.workdir)
if args.gitdir : gitcmd.append("--git-dir=%s" % args.gitdir)
if args.verbose: level = logger.DEBUG
elif args.quiet: level = logger.WARN
else: level = logger.INFO
logger.basicConfig(level=level, format='%(message)s')
# UI done, it's show time!
start = time.time() # yes, Wall time. CPU time is not realistic for users.
lines = loglines = commits = totalfiles = \
ignoredfiles = files = touches = errors = 0
stepmissing = 100
# First things first: Where and Who are we?
workdir, gitdir = subprocess.check_output(gitcmd + shlex.split(
'rev-parse --show-toplevel --git-dir'),
workdir = os.path.abspath(workdir)
gitdir = os.path.abspath(gitdir)
except subprocess.CalledProcessError as e:
# rev-parse couldn't find git repo, and already informed user.
# So we just...
# Get the files managed by git
lsfileslist = set()
gitobj = subprocess.Popen(gitcmd + shlex.split('ls-files --full-name') +
['--'] + args.pathspec,
stdout=subprocess.PIPE, universal_newlines=True)
for line in gitobj.stdout:
# List files matching user pathspec, relative to current directory
# git commands always print paths relative to work tree root
filelist = set()
dirlist = set()
for path in args.pathspec:
# Normalize user input so ./doc = doc/ = doc/../doc/. = doc
path = os.path.normpath(path)
# Is path inside the work tree?
if os.path.commonprefix([workdir, os.path.abspath(path)]) != workdir:
logger.warn("WARNING: Skipping pathspec outside work tree: %s", path)
# git does not care if it's a broken symlink, hence lexists
if not os.path.lexists(path):
logger.warn("WARNING: Skipping non-existing pathspec: %s", path)
# file or symlink (to file, to dir or broken - git handles the same way)
if os.path.isfile(path) or os.path.islink(path):
# Always add them relative to worktree root
filelist.add(os.path.relpath(path, workdir))
# dir
for root, subdirs, files in os.walk(path):
if gitdir in [os.path.abspath(os.path.join(root, subdir))
for subdir in subdirs]:
if os.path.abspath(root) == workdir and '.git' in files:
if args.dirs:
dir = os.path.relpath(root, workdir)
if dir == '.':
dir = ''
for file in files:
# Always add them relative to worktree root
filelist.add(os.path.relpath(os.path.join(root, file), workdir))
filelist &= lsfileslist
totaldirs = dirs = len(dirlist)
totalfiles = files = len(filelist)"{:,} files to be processed in work dir".format(totalfiles))
# Discard untracked and ignored files
ignoredlist = set()
gitobj = subprocess.Popen(gitcmd + shlex.split('status --porcelain --ignored') +
['--'] + args.pathspec,
stdout=subprocess.PIPE, universal_newlines=True)
for line in gitobj.stdout:
line = line.strip()
status = line[:2]
filespec = line[3:]
# Make sure the slash matches the os; for Windows we need a backslash
filespec = os.path.normpath(filespec)
if status in ['??', '!!']: # also safe to ignore: 'A ', ' M'
# filespec can be a dir, so we must iterate on filelist
for file in filelist:
if ( (filespec.endswith(os.sep) and file.startswith(filespec)) or
(file == filespec) ):
logger.debug("Ignoring: %s", file)
files -= 1
ignoredfiles += 1
elif not args.force:
"ERROR: There are local changes in the working directory.\n"
"This could lead to undesirable results for modified files.\n"
"Please, commit your changes (or use --force) and try again.\n"
if ignoredfiles:
filelist -= ignoredlist"{:,} files to process after ignoring {:,}"
"".format(files, ignoredfiles))
# Process the log until all files are 'touched'
logger.debug("Line #\tLog #\tFiles\tmtime\tFile")
def parselog(merge=False, filterlist=[]):
global loglines, dirs, files, touches, errors, commits
gitobj = subprocess.Popen(gitcmd + shlex.split('whatchanged --pretty={}'.format(args.timeformat)) +
(['-m'] if merge else []) +
(['--first-parent'] if args.first_parent else []) +
['--'] + filterlist,
stdout=subprocess.PIPE, universal_newlines=True)
for line in gitobj.stdout:
loglines += 1
line = line.strip()
# Blank line between Date and list of files
if not line: continue
# File line
if line.startswith(':'):
# If line describes a rename, linetok has three tokens, otherwise
# two.
linetok = line.split('\t')
status = linetok[0]
file = linetok[-1]
# Make sure the slash matches the os; for Windows we need a backslash
file = os.path.normpath(file)
if file.startswith('"'):
file = file[1:-1].decode("string-escape")
if file in filelist:
loglines, commits, files,
time.ctime(mtime), file)
files -= 1
if not args.test:
lutime(os.path.join(workdir, file), (mtime, mtime))
touches += 1
except Exception as e:
logger.error("ERROR: %s\n", e)
errors += 1
if args.dirs:
dir = os.path.dirname(file)
if status[-1] in ['A', 'D'] and dir in dirlist:
loglines, commits,
time.ctime(mtime), dir)
if not args.test:
lutime(os.path.join(workdir, dir), (mtime, mtime))
dirs -= 1
except Exception as e:
logger.error("ERROR: %s\n", e)
# Date line
commits += 1
mtime = int(line)
# All files done?
if not files:
except OSError:
parselog(args.merge, args.pathspec)
# Missing files
if filelist:
# Try to find them in merge logs, if not done already
# (usually HUGE, thus MUCH slower!)
if args.missing and not args.merge:
filterlist = list(filelist)
for i in range(0, len(filterlist), stepmissing):
parselog(merge=True, filterlist=filterlist[i:i+stepmissing])
# Still missing some?
for file in filelist:
logger.warn("WARNING: not found in log: %s", file)
# Final statistics
# TODO: use git-log --before=mtime to brag about skipped log entries
"{:13,.2f} seconds\n"
"{:13,} log lines processed\n"
"{:13,} commits evaluated"
"".format(time.time()-start, loglines, commits))
if touches != totalfiles:"{:13,} total files".format(totalfiles))
if ignoredfiles:"{:13,} ignored files".format(ignoredfiles))
if files:"{:13,} missing files".format(files))
if errors:"{:13,} update errors".format(errors))"{:13,} updated files".format(touches))
if args.dirs:"{:13,} updated directories".format(totaldirs - dirs))
if args.test:"TEST RUN - No files modified!")
