Skip to content

Python3 search images by mime type recursively

AndyCrowd edited this page Sep 17, 2014 · 3 revisions

Search Images by mime-type in folders and populate the array.

#!/bin/python3

import magic
from PIL import Image
import glob, os
import sys
import shutil
import hashlib

##########
#from optparse import OptionParser
#parser = OptionParser()
#(options, args) = parser.parse_args()
#PathToFile = "".join(args)
##########
CountAll = 0
ArrayOfFiles = []
ArrayOfRes = []
ArrayOfFSubTypes = []
md5Array = []
count_md5_dupes = {}
##########
#
# The MD5sum function
#
##########
MchkEnable = "True"
def GetMD5ofAfile(MDTR,var):
 FN = MDTR
 if MchkEnable == "True":
  FN = ArrayOfFiles[var]
  FileName = open(FN,'rb')
  FileData = FileName.read()
  FileName.close()
  md5 = hashlib.md5()
  md5.update(FileData)
  md5sum = md5.hexdigest()
#  md5Array.append(md5sum) 
#  MDA = md5Array[var]
#  cmd5dupes=count_md5_dupes.get(MDA, 0)
#  count_md5_dupes[MDA]=cmd5dupes+1
  var=var+1
  return md5sum
 else:
  return "Skipped"

  
###########
# Going through folders
###########
rootdir = "."
for root, subFolders, files in os.walk(rootdir):
 for FLS in files: 
   PathToFile = "{0}/{1}".format(root,FLS)
     #for PathToFile in glob.glob("*"):
   mimeTypeIs = magic.open(magic.MAGIC_MIME_TYPE)
   mimeE = magic.open(magic.MAGIC_MIME_ENCODING)
   mimeTypeIs.load()
   mimeE.load()
   mtype = mimeTypeIs.file(PathToFile)
   mimeEnc = mimeE.file(PathToFile)
   FType = mtype.split('/')[0]
   FSubTypes = mtype.split('/')[1]
   if "image" in FType:
       file, ext = os.path.splitext(PathToFile)
       IfImage = Image.open(PathToFile)
       G = str(IfImage.size).replace(')',"").replace('(','').replace(' ','')
       IfImage.close()
       s = G
       W = s.split(',')[0]
       H = s.split(',')[1]
       try:
         myimg = Image.open(PathToFile,"r")
       except IOError:
         print("Cannot open the «",PathToFile,"» file!")
       else:
        try:
          TestLoadImgData = list(myimg.getdata())
        except IOError:
          print("An error was found in a «",PathToFile,"» file!")
          myimg.close()
        else:
          myimg.close()
          ArrayOfFiles.append(PathToFile)
          ResAll = "{0}x{1}".format(W,H)
          Mfilename = GetMD5ofAfile(PathToFile,CountAll)
          md5Array.append(Mfilename)
          ArrayOfRes.append(ResAll)
          ArrayOfFSubTypes.append(FSubTypes)
          MDA = md5Array[CountAll]
          cmd5dupes = count_md5_dupes.get(MDA, 0)
          count_md5_dupes[MDA] = cmd5dupes+1
          CountAll = CountAll + 1
#####
I = 0
while I < CountAll:       
 HAHA = md5Array[I]
 print(ArrayOfFiles[I],ArrayOfRes[I],ArrayOfFSubTypes[I],md5Array[I],count_md5_dupes[HAHA])
 I = I + 1

Clone this wiki locally