Home
AndyCrowd edited this page Sep 27, 2014
·
6 revisions
Check integrity of images with a python3 script. Left command line and then move part
#!/bin/python3 import magic from PIL import Image import glob, os import sys import shutil import hashlib import time from stat import * from collections import defaultdict import argparse ####### # # CMD line check # parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumulator') parser.add_argument('--sum', dest='accumulate', action='store_const', const=sum, default=max, help='sum the integers (default: find the max)') args = parser.parse_args() print(args.accumulate(args.integers)) # # ###### #UserInfoEnabled = "False" try: import pwd except ImportError: UserInfoEnabled = "False" print("Failed to import 'pwd'") exit() else: UserInfoEnabled = "True" ########## #from optparse import OptionParser #parser = OptionParser() #(options, args) = parser.parse_args() #PathToFile = "".join(args) ########## ## ARRAYS !!!! ArrayOfFiles = [] ArrayOfRes = [] ArrayOfFSubTypes = [] FileHashArray = [] count_FileHash_dupes={} Width = {} Height = {} DupeRes = defaultdict(lambda: 0) SkipClone = {} #### ## Start Variables !!!! #### sys.stdout.write('.') CountAll = 0 TMPstr="" ### ## ModifAble variables !!!! ### HashType = "md5" MchkEnable = "True" YouNeedFileSize = "True" OrderYouNeed = ["Path","DupeRes","Clones","Size"] ShowClones = "True" ############# def size4human(this_size): for SizeForMe in ['bytes','KB','MB','GB','TB']: if this_size < 1024.0: return "%3.1f %s" % (this_size, SizeForMe) this_size /= 1024.0 ########## # # The FileHashsum function # ########## def GetFileHashofAfile(MDTR,var): FN = MDTR if MchkEnable == "True": FN = ArrayOfFiles[var] FileName = open(FN,'rb') FileData = FileName.read() FileName.close() if HashType == "md5": FileHash = hashlib.md5() elif HashType == "sha1": FileHash = hashlib.sha1() elif HashType == "sha224": FileHash = hashlib.sha224() elif HashType == "sha256": FileHash = hashlib.sha256() elif HashType == "sha384": FileHash = hashlib.sha384() elif HashType == "sha512": FileHash = hashlib.sha512() else: print("Error in HashType") exit() FileHash.update(FileData) FileHashsum = FileHash.hexdigest() # FileHashArray.append(FileHashsum) # MDA = FileHashArray[var] # cFileHashdupes=count_FileHash_dupes.get(MDA, 0) # count_FileHash_dupes[MDA]=cFileHashdupes+1 var=var+1 return FileHashsum else: return "Skipped" ########### # Going through folders ########### rootdir = "." for root, subFolders, files in os.walk(rootdir): for FLS in files: PathToFile = "{0}/{1}".format(root,FLS) #for PathToFile in glob.glob("*"): mimeTypeIs = magic.open(magic.MAGIC_MIME_TYPE) mimeE = magic.open(magic.MAGIC_MIME_ENCODING) mimeTypeIs.load() mimeE.load() mtype = mimeTypeIs.file(PathToFile) mimeEnc = mimeE.file(PathToFile) FType = mtype.split('/')[0] FSubTypes = mtype.split('/')[1] if "image" in FType: file, ext = os.path.splitext(PathToFile) IfImage = Image.open(PathToFile) G = str(IfImage.size).replace(')',"").replace('(','').replace(' ','') IfImage.close() s = G W = s.split(',')[0] H = s.split(',')[1] try: myimg = Image.open(PathToFile,"r") except IOError: print("Cannot open the «",PathToFile,"» file!") else: try: TestLoadImgData = list(myimg.getdata()) except IOError: print("An error was found in a «",PathToFile,"» file!") myimg.close() else: myimg.close() ArrayOfFiles.append(PathToFile) Mfilename = GetFileHashofAfile(PathToFile,CountAll) FileHashArray.append(Mfilename) if Mfilename == "Skipped": count_FileHash_dupes[CountAll] = "" FileHashArray[CountAll] = "0" else: MDA = FileHashArray[CountAll] cF = count_FileHash_dupes.get(MDA, 0) count_FileHash_dupes[MDA] = cF + 1 ################################################################ if count_FileHash_dupes[MDA] > 1: SkipClone[CountAll] = 1 else: SkipClone[CountAll] = 0 if ShowClones == "True": SkipClone[CountAll] = 0 ArrayOfFiles.append(PathToFile) ResAll = "{0}x{1}".format(W,H) Width[CountAll] = W Height[CountAll] = H DupeRes[W,H] += 1 # Mfilename = GetFileHashofAfile(PathToFile,CountAll) FileHashArray.append(Mfilename) ArrayOfRes.append(ResAll) ArrayOfFSubTypes.append(FSubTypes) # if Mfilename == "Skipped": # count_FileHash_dupes[CountAll] = "" # FileHashArray[CountAll] = "0" # else: # MDA = FileHashArray[CountAll] # cF = count_FileHash_dupes.get(MDA, 0) # count_FileHash_dupes[MDA] = cF + 1 # ArrayOfFiles.append(PathToFile) CountAll = CountAll + 1 ##### I = 0 #OrderYouNeed = ["P","R","H","D","S","U","I"] #OrderYouNeed = ["Path","Res","Hash","Clones","Size","UserPerm","DupeRes"] while I < CountAll: WforDup = Width[I] HforDup = Height[I] for ToShowInOrder in OrderYouNeed: if ToShowInOrder == "Path": # print("|Path:| ", ArrayOfFiles[I],"",end="") TMPstr+="|Path:|"+ArrayOfFiles[I] elif ToShowInOrder == "Res": # print("|Res:| ", ArrayOfRes[I],"",end="") TMPstr+="|Res:|"+ArrayOfRes[I] elif ToShowInOrder == "Hash": # print("|Hash:| ", FileHashArray[I],"",end="") TMPstr+="|Hash:| "+FileHashArray[I] elif ToShowInOrder == "Clones": GetHFA = FileHashArray[I] # print("|Dupes:| ", count_FileHash_dupes[GetHFA],"",end="") # print(count_FileHash_dupes[GetHFA]) TMPstr+="|Clones:| "+str(count_FileHash_dupes[GetHFA]) elif ToShowInOrder == "Size": FSize4Print = os.path.getsize(ArrayOfFiles[I]) # print("|Size:| ", size4human(FSize4Print),"",end="") TMPstr+="|Size:| "+size4human(FSize4Print) elif ToShowInOrder == "UserPerm": st = os.stat(ArrayOfFiles[I]) # print("|",pwd.getpwuid(st[ST_UID]),end="") TMPstr+="|",pwd.getpwuid(st[ST_UID]) elif ToShowInOrder == "DupeRes": # print("|DupeRes:| ",DupeRes[WforDup,HforDup],end="") TMPstr+="|DupeRes:| "+str(DupeRes[WforDup,HforDup]) else: print("ERROR in Order type") exit() if SkipClone[I] == 0 : print(TMPstr,"|") TMPstr="" # print("|") I = I + 1 #################### WHILE MOVE ####################