# Extract all or specific files from one or multiple zip files

## Goal

Extract all or specific files, using a file indicator, from one or multiple zip files, located in a specific folder.\
If files from multiple zip files need to be extrated to a separate folder per zip file, the zip file name will be used to name each new folder.\
Incase extracted files need to be merged into one folder, if file names are repeated in the different zip files, the zip file name will be added as a prefix to the extracted files.

## Example
* **Zip files to extract files from:**
> test1.zip [AAAB.csv, AAAM10.csv]\
> test2.zip [AAAB.csv, AAAM20.csv]


* **If extraction to one folder per zip file:**
    * if file_indicator = 'AAAM'
    > test1 [AAAM10.csv]\
    > test2 [AAAM20.csv]

* **If extraction to one folders from all zip files:**

    * if file_indicator = 'AAAM'
    > oneFolder [AAAM10.csv, AAAM20.csv]
    
    * if file_indicator = 'AAAB'
    > oneFolder [test1-AAAB.csv, test2-AAAB.csv]
    
    * if file_indicator = 'AAA' or ''
    > oneFolder [test1-AAAB.csv, test1-AAAM10.csv, test2-AAAB.csv, test2-AAAM20.csv]


## Python3 Code

In [1]:
from zipfile import ZipFile
import os

In [2]:
# zip files directory path
direc = r"C:\Users\hassan.elhassan\OneDrive - Confo Therapeutics\Documents\NGS\2020-04-24 cycle6\AnnotatorMail\20200424"

In [3]:
# the string indicator that will be used to extract specific files, only the files who contain that indicator
# use empty string, "", if all files need to be extracted from the zip files
file_indicator = "AnnotatedVRegion-AA.txt"

In [4]:
# if extracted files need to be merged into one folder (True) or not (False)
merge = True
# if merge True, output merge folder 
merge_folder = "AA.txt files"

In [5]:
# if merge is True create a list and a set from all file names, containing the indicator, from all the zip files 
# difference in length between the list and set indicate the presence of files with identical names in the list
# set does not contain duplicate values
if merge:
    
    # create empty list to hold all file names
    files_list = []

    # loop over all files in given directory
    for i in os.listdir(direc):
        
        # check if file is zip file
        if i.endswith(".zip"):
            
            # open zip file, 'with' will close the zip file after use
            with ZipFile(direc+"\\"+i) as files:
                
                # loop over names of the files in the zip file
                for file in files.namelist():
                    
                    # check if file_indicator is in file name
                    if file_indicator in file:
                        
                        # append the files containing the file indicator to the list
                        files_list.append(file)
    
    # create a set out of the files_list that holds all the file names, containing the file indicator, in the zip files
    files_set = set(files_list)


In [18]:
# extract files 
# loop over all files in given directory
for i in os.listdir(direc):
    
    # check if zip file
    if i.endswith(".zip"):
        
        # open zip file, 'with' will close the zip file after use 
        with ZipFile(direc+"\\"+i) as files:
            
            # loop over the names of the files in the zip file
            for file in files.namelist():
                
                # check if file_indicator is in file name
                if file_indicator in file:
                    
                    # if merge is False create one folder per zip file named after each zip file
                    if not merge:
                        
                        # destination folder name using the zip file name without the .zip extension
                        dest = direc+"\\"+ os.path.splitext(i)[0]
                        
                        # extract file to the destination folder, folder will be created if not exist
                        files.extract(file, dest)
                     
                    # if merge is True create one folder
                    elif merge:
                        
                        # destination folder
                        dest = direc+"\\"+ merge_folder

                        # extract file to the destination folder, folder will be created if not exist
                        files.extract(file, dest)
                        
                        # if len of 'files_list' is not equal to len of 'files_set' add the zip file name as prefix to each of the extracted files
                        if len(files_list)!=len(files_set):
                            
                            # the old name of the file
                            old_file_name = dest+"\\"+file
                            
                            # the new name containing the zip file name without the .zip extension as prefix
                            new_file_name = dest+"\\"+os.path.splitext(i)[0] + "-" + file
                            
                            # rename olf file name to the new file name
                            os.rename(old_file_name,new_file_name)
