In [1]:
import os
import sys
import re
import csv
import zipfile
import pandas as pd
import shutil


In [2]:
def extract_zip(_zip_path, _ofile):
    parent_path = os.path.split(_zip_path)[0]
    file = os.path.split(_zip_path)[1]

    try:
        # Get file name
        output_folder = os.path.splitext(file)[0]
        # Get output folder path
        output_path = os.path.join(parent_path, "ZIP", output_folder)
        backup_path = os.path.join("..","UNZIP/")
        # Read and extract all files to output folder and close object
        zip_obj = zipfile.ZipFile(_zip_path, 'r')
        zip_obj.extractall(output_path)
        zip_obj.close()
        # Move zip file to Unzip folder
        check_dir(backup_path, "c")
        shutil.move(_zip_path, backup_path + file)
        return output_path
    except Exception as e:
        _ofile.write("Exception occurred: {}".format(e) + "\n")  # Write error to text file  


In [3]:
def check_dir(_file_path, _type):
    output_dir = os.path.split(_file_path)[0]
    if not os.path.exists(output_dir):
        exist = False
    else:
        exist = True
        
    if _type == "c":
        if not exist:
            os.makedirs(output_dir)
        return True
    elif _type == "d":
        if exist:
            shutil.rmtree(output_dir)
        return False
    else:
        return exist
        


In [4]:
def write_xcel2_txt(_file_name, _data, _ofile):
    df = pd.DataFrame(_data)
    if "metadata" in _file_name:
        # Print 1st 4 rows and 2 columns
        _ofile.write(str(df.iloc[0:3,0:2]))
    else:
        # Print 1st 4 rows and 5 columns
        _ofile.write(str(df.iloc[0:3,0:4]))
    _ofile.write("\n---------------------------------------\n")

In [5]:
def record_fileInfo(dir_path, output_path):
    # Read all files to list
    xList = [x for x in os.listdir(dir_path)]
    # If output dir doesn't exist, create dir
    check_dir(output_path, "c")
    
    # Open text file in APPEND mode (a, r - read, w -write)
    with open(output_path, "a") as xfile:
        # Loop thru the file list and write info to opened text file if criteria are met
        i = 5
        for x in xList:
            xfile.write(f'****************************************************************************\n')
            xfile.write(f'### {x} ###\n')  # Write file name to text file
            try:
                # Split the extension from the path and normalise it to lowercase.
                ext = os.path.splitext(x)[-1].lower()

                # If it doesn't have an extension, call self-function to see if it's a directory with files; 
                # else if it's csv, excel, or text files, read and write info to output text file
                if not ext:
                    record_fileInfo(dir_path + x + "/", output_path)                               
                elif ext == ".zip":
                    # Call extract zip function
                    zip_path = extract_zip(dir_path + x, xfile)
                    record_fileInfo(zip_path + "/", output_path)  
                elif ext == ".csv":
                    # Read CSV data to dataframe
                    csv = pd.read_csv(dir_path + x) 
                    write_xcel2_txt(x, csv, xfile)  

                elif ext == ".xls" or ext == ".xlsx":  
                    xlx = pd.read_excel(dir_path + x)   # Read excel data 
                    write_xcel2_txt(x, xlx, xfile)  

                elif ext == ".txt":
                    with open(dir_path + x, "r") as page:
                        for line in page:
                            if i > 0:
                                xfile.write(str(line.strip()) + "\n")
                                i -= 1
                            else:
                                break
            except Exception as e:
                xfile.write("Exception occurred: {}".format(e) + "\n")  # Write error to text file



In [6]:
# Get all files from directory Resources
excel_path = ".\Zip\Zip\CIUS2015downloadablefiles\CIUS2015datatables"  #os.path.join(".", "Zip", "Zip")
output_path = os.path.join(".", "Output", "resource_output.txt")
record_fileInfo(excel_path + '/', output_path)

# Open output file & print result
print(open(output_path).read())


### aff_download_readme_ann.txt ###
****************************************************************************
### household.csv ###
      CONTROL  TOTROOMS TOTHCAMT  PERPOVLVL
0  '11000001'         8     '14'        501
1  '11000002'         7     '15'        501
2  '11000005'         8     '12'        501
---------------------------------------
****************************************************************************
### person.csv ###
      CONTROL JAGE JCITSHP JENROLL
0  '11000001'  '0'     '0'     '0'
1  '11000001'  '0'     '0'     '0'
2  '11000001'  '0'     '0'     '0'
---------------------------------------
****************************************************************************
### project.csv ###
      CONTROL JJOBCOMP JJOBCOMPYR JJOBCOST
0  '11000001'      '0'        '0'      '0'
1  '11000002'      '0'        '0'      '0'
2  '11000002'      '0'        '0'      '0'
---------------------------------------
****************************************************************