Skip to content
AndyCrowd edited this page Sep 27, 2014 · 6 revisions

Check integrity of images with a python3 script. Left command line and then move part

#!/bin/python3

import magic
from PIL import Image
import glob, os
import sys
import shutil
import hashlib
import time
from stat import * 
from collections import defaultdict
import argparse

#######
#
# CMD line check
# 
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
                   help='an integer for the accumulator')
parser.add_argument('--sum', dest='accumulate', action='store_const',
                   const=sum, default=max,
                   help='sum the integers (default: find the max)')

args = parser.parse_args()
print(args.accumulate(args.integers))

#
#
######
#UserInfoEnabled = "False"
try:
 import pwd
except ImportError:
 UserInfoEnabled = "False"
 print("Failed to import 'pwd'")
 exit()
else:
 UserInfoEnabled = "True"
 

##########
#from optparse import OptionParser
#parser = OptionParser()
#(options, args) = parser.parse_args()
#PathToFile = "".join(args)
##########

## ARRAYS !!!!


ArrayOfFiles = []
ArrayOfRes = []
ArrayOfFSubTypes = []
FileHashArray = []
count_FileHash_dupes={}
Width = {}
Height = {}
DupeRes = defaultdict(lambda: 0)
SkipClone = {}

####
## Start Variables !!!!
####

sys.stdout.write('.')
CountAll = 0
TMPstr=""

###
## ModifAble variables !!!!
###

HashType = "md5"
MchkEnable = "True"
YouNeedFileSize = "True"
OrderYouNeed = ["Path","DupeRes","Clones","Size"]
ShowClones = "True"

#############

def size4human(this_size):
    for SizeForMe in ['bytes','KB','MB','GB','TB']:
        if this_size < 1024.0:
            return "%3.1f %s" % (this_size, SizeForMe)
        this_size /= 1024.0

##########
#
# The FileHashsum function
#
##########


def GetFileHashofAfile(MDTR,var):
 FN = MDTR
 if MchkEnable == "True":
  FN = ArrayOfFiles[var]
  FileName = open(FN,'rb')
  FileData = FileName.read()
  FileName.close()
  if HashType == "md5":
   FileHash = hashlib.md5()
  elif HashType == "sha1":
   FileHash = hashlib.sha1()
  elif HashType == "sha224":
   FileHash = hashlib.sha224()
  elif HashType == "sha256":
   FileHash = hashlib.sha256()
  elif HashType == "sha384":
   FileHash = hashlib.sha384()
  elif HashType == "sha512":
   FileHash = hashlib.sha512()
  else:
   print("Error in HashType")
   exit()
  FileHash.update(FileData)
  FileHashsum = FileHash.hexdigest()
#  FileHashArray.append(FileHashsum) 
#  MDA = FileHashArray[var]
#  cFileHashdupes=count_FileHash_dupes.get(MDA, 0)
#  count_FileHash_dupes[MDA]=cFileHashdupes+1
  var=var+1
  return FileHashsum
 else:
  return "Skipped"

  
###########
# Going through folders
###########
rootdir = "."
for root, subFolders, files in os.walk(rootdir):
 for FLS in files: 
   PathToFile = "{0}/{1}".format(root,FLS)
     #for PathToFile in glob.glob("*"):
   mimeTypeIs = magic.open(magic.MAGIC_MIME_TYPE)
   mimeE = magic.open(magic.MAGIC_MIME_ENCODING)
   mimeTypeIs.load()
   mimeE.load()
   mtype = mimeTypeIs.file(PathToFile)
   mimeEnc = mimeE.file(PathToFile)
   FType = mtype.split('/')[0]
   FSubTypes = mtype.split('/')[1]
   if "image" in FType:
       file, ext = os.path.splitext(PathToFile)
       IfImage = Image.open(PathToFile)
       G = str(IfImage.size).replace(')',"").replace('(','').replace(' ','')
       IfImage.close()
       s = G
       W = s.split(',')[0]
       H = s.split(',')[1]
       try:
         myimg = Image.open(PathToFile,"r")
       except IOError:
         print("Cannot open the «",PathToFile,"» file!")
       else:
        try:
          TestLoadImgData = list(myimg.getdata())
        except IOError:
          print("An error was found in a «",PathToFile,"» file!")
          myimg.close()
        else:
          myimg.close()
          ArrayOfFiles.append(PathToFile)
          Mfilename = GetFileHashofAfile(PathToFile,CountAll)
          FileHashArray.append(Mfilename)
          if Mfilename == "Skipped":
           count_FileHash_dupes[CountAll] = ""
           FileHashArray[CountAll] = "0"
          else:
           MDA = FileHashArray[CountAll]
           cF = count_FileHash_dupes.get(MDA, 0)
           count_FileHash_dupes[MDA] = cF + 1
################################################################
          if count_FileHash_dupes[MDA] > 1:
           SkipClone[CountAll] = 1
          else:
           SkipClone[CountAll] = 0         
          if ShowClones == "True":
           SkipClone[CountAll] = 0
          ArrayOfFiles.append(PathToFile)
          ResAll = "{0}x{1}".format(W,H)
          Width[CountAll] = W
          Height[CountAll] = H
          DupeRes[W,H] += 1
#          Mfilename = GetFileHashofAfile(PathToFile,CountAll)
          FileHashArray.append(Mfilename)
          ArrayOfRes.append(ResAll)
          ArrayOfFSubTypes.append(FSubTypes)
#          if Mfilename == "Skipped":
#           count_FileHash_dupes[CountAll] = ""
#           FileHashArray[CountAll] = "0"        
#          else:
#            MDA = FileHashArray[CountAll]             
#            cF = count_FileHash_dupes.get(MDA, 0) 
#            count_FileHash_dupes[MDA] = cF + 1            
#          ArrayOfFiles.append(PathToFile)
          CountAll = CountAll + 1
#####
I = 0
#OrderYouNeed = ["P","R","H","D","S","U","I"]
#OrderYouNeed = ["Path","Res","Hash","Clones","Size","UserPerm","DupeRes"]
while I < CountAll:
 WforDup = Width[I]
 HforDup = Height[I]
 for ToShowInOrder in OrderYouNeed:
   if ToShowInOrder == "Path":
#    print("|Path:| ", ArrayOfFiles[I],"",end="")
    TMPstr+="|Path:|"+ArrayOfFiles[I]
   elif ToShowInOrder == "Res":
#    print("|Res:| ", ArrayOfRes[I],"",end="")
    TMPstr+="|Res:|"+ArrayOfRes[I]
   elif  ToShowInOrder == "Hash":
#    print("|Hash:| ", FileHashArray[I],"",end="")
    TMPstr+="|Hash:| "+FileHashArray[I]
   elif  ToShowInOrder == "Clones":
    GetHFA =  FileHashArray[I]
#    print("|Dupes:| ", count_FileHash_dupes[GetHFA],"",end="")
#     print(count_FileHash_dupes[GetHFA])
    TMPstr+="|Clones:| "+str(count_FileHash_dupes[GetHFA])
   elif ToShowInOrder == "Size":
    FSize4Print = os.path.getsize(ArrayOfFiles[I])
#    print("|Size:| ", size4human(FSize4Print),"",end="")
    TMPstr+="|Size:| "+size4human(FSize4Print)
   elif ToShowInOrder == "UserPerm":
    st = os.stat(ArrayOfFiles[I])
#    print("|",pwd.getpwuid(st[ST_UID]),end="")
    TMPstr+="|",pwd.getpwuid(st[ST_UID])
   elif ToShowInOrder == "DupeRes":
#    print("|DupeRes:| ",DupeRes[WforDup,HforDup],end="")
     TMPstr+="|DupeRes:| "+str(DupeRes[WforDup,HforDup])
   else:
    print("ERROR in Order type")
    exit()
 if SkipClone[I] ==  0 :
  print(TMPstr,"|")
 TMPstr=""
# print("|")
 I = I + 1

#################### WHILE MOVE ####################
Clone this wiki locally