In [34]:
# Importing modules
import glob
import os
from pathlib import Path
import pandas as pd
import zipfile

In [92]:
# Defining FileFinder
def FileFinder(YKR_IDs: list, input_folder_name: str, to_file=False):
    """
    Returns a list of time matrix files based on a list of YKR ID values
    from a specified input data folder. 

            Parameters:
                    YKR_IDs (list): A list of YKR_ID numbers
                    input_folder_name (str): Name of the input folder
                    to_file (boolean): If True, also returns a text file

            Returns:
                    file_paths (list): A list of filepaths
    """
    # Using assert to make sure input is ok
    assert type(YKR_IDs) == list, "The input of the YKR_ID:s needs to be a list!"
    # Finding the folder from the user's instance
    input_folder = Path(input_folder_name).absolute()
    # Using assert to make sure input folder exists
    assert os.path.isdir(input_folder) == True, "Check the input folder's name!"
    # Defining counter
    counter = 1
    # Creating an empty list for the filepaths
    file_paths = []
    # Creating an empty list for the filenames
    file_names = []
    # For-looping the input list
    for YKR_ID in YKR_IDs:
        # Compliting them with the possible full text file name
        full_name = f"travel_times_to_ {YKR_ID}.txt"
        # Appending the name to a list
        file_names.append(full_name)
    # For-looping the files in the user's instance
    for root, dirs, files in os.walk(input_folder):
        for name in files:
            # Checking if the filename is in the input list
            if name not in file_names:
                continue
            # Appending the name to a list
            file_paths.append(name)
    # Checking if the two lists are the same length
    if len(file_paths) != len(file_names):
        # For-looping the input list
        for fn in file_names:
            # Checking if the file name is in the file path list
            if fn not in file_paths:
                # Printing a warning to inform user
                print(f"File {fn} does not exist in the folder: {input_folder_name}.\nMake sure the YKR_ID values in the input list are typed correctly.")
            else:
                continue
    # For-looping the file path list
    for fp in file_paths:
        # Informing the user on the progress
        print(f"Processing file {fp}. Progress: {counter}/{len(file_paths)}")
        # Increasing counter
        counter += 1
    # Checking for the optional parameter (default is False)
    if to_file == True:
        # Writing the list of filepaths to a text file
        with open("YKR_ID_fps", "w") as output:
            output.write(str(file_paths))
        # Informing the user of the name of the new text file
        print("Wrote the filepaths to: YKR_ID_fps.txt")
    # Returning the list of filepaths
    return file_paths

The reason the FileFinder has many for-loops is because the FileFinder deals with an awkward order problem. See, for it to work with input folders with more files than the given input list, there has to be a specific order. After the assertions and initializing the counter and the lists, the FileFinder completes the names of the given input YKR_ID list. Then, it checks if the files in the input folder match the input list names. If there are e.g. falsely typed YKR ID's in the input list, the FileFinder gives the user a warning. Then there is this "fake" progress indicator to inform the user of the files FileFinder is processing. Finally, the FileFinder checks if the user asks for the file path list to be also written to a text file. 
I decided on this order because there is no reason to go through all of the files in the users input folder, because there could be thousands of files there if, for example, the user has also used the unzip-function for extracting all of the files.

In [84]:
# A test list for the FileFinder
lista = ["5787544", "5787545", "5787546", "5787547", "5787548", "5787549", "5787550", "1234567", "5990000", "5990001", "5990002"]

In [93]:
# Testing the FileFinder
FileFinder(lista, "input_data")

File travel_times_to_ 1234567.txt does not exist in the folder: input_data.
Make sure the YKR_ID values in the input list are typed correctly.
Processing file travel_times_to_ 5787544.txt. Progress: 1/10
Processing file travel_times_to_ 5787545.txt. Progress: 2/10
Processing file travel_times_to_ 5787546.txt. Progress: 3/10
Processing file travel_times_to_ 5787547.txt. Progress: 4/10
Processing file travel_times_to_ 5787548.txt. Progress: 5/10
Processing file travel_times_to_ 5787549.txt. Progress: 6/10
Processing file travel_times_to_ 5787550.txt. Progress: 7/10
Processing file travel_times_to_ 5990000.txt. Progress: 8/10
Processing file travel_times_to_ 5990001.txt. Progress: 9/10
Processing file travel_times_to_ 5990002.txt. Progress: 10/10


['travel_times_to_ 5787544.txt',
 'travel_times_to_ 5787545.txt',
 'travel_times_to_ 5787546.txt',
 'travel_times_to_ 5787547.txt',
 'travel_times_to_ 5787548.txt',
 'travel_times_to_ 5787549.txt',
 'travel_times_to_ 5787550.txt',
 'travel_times_to_ 5990000.txt',
 'travel_times_to_ 5990001.txt',
 'travel_times_to_ 5990002.txt']

In [82]:
print(Path("input_data").absolute())

C:\Users\zuzzuz\Desktop\PYQGIS_101\AUTOGIS_PACKAGE\final-assignment-pjustus\input_data


In [2]:
def TableJoiner(fp_list: list):
    
    
    
    
    

SyntaxError: unexpected EOF while parsing (<ipython-input-2-6b3f7d4f316b>, line 1)

In [47]:
# Defining unzip
def unzip(zip_file: str, target_folder: str):
    """
    Extracts selected file to a selected directory. 

            Parameters:
                    zip_file (str): Name of the zip file
                    target_dir (str): Name of the target directory

            Returns:
                    None
    """
    # Reading zipfile
    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        # Extracting zipfile to target folder
        zip_ref.extractall(target_folder)
        # Informing user of the progress
        print(f"File: {zip_file} extracted to: {target_folder}.")

In [48]:
# Testing unzip
unzip("MetropAccess_YKR_grid.zip", "grid_file")

File: MetropAccess_YKR_grid.zip extracted to: grid_file.


In [49]:
unzip("HelsinkiRegion_TravelTimeMatrix2018.zip", "TTM_files")

File: HelsinkiRegion_TravelTimeMatrix2018.zip extracted to: TTM_files.


In [51]:
len(file_paths)

NameError: name 'file_paths' is not defined

In [3]:
def Visualizer():

SyntaxError: unexpected EOF while parsing (<ipython-input-3-935f67ef1ef3>, line 1)

In [4]:
def ComparisonTool():

SyntaxError: unexpected EOF while parsing (<ipython-input-4-c7fae3c3a60c>, line 1)