The following script creates the folders (after checking that they don't already exist) 'audio', 'docs' and 'images'. It then iterates over the main folder, moving each file according to its extension to the relevant folder, and compiles a csv file in which it notes the name, format and size of that file. As indicated, it is assumed that the notebook is at the same level (i.e. in the same folder) as the files folder that contains the files to be moved. Relative references are therefore used

In [1]:
# Script step 1

# import the libraries we will need
import os
import shutil
import csv

# First, I create the folders (if they do not already exist) into which the files will be moved.
for foldername in ["audio", "docs", "images"]:
    new_folder = os.path.join("", foldername)
    if not os.path.exists(new_folder):
        os.makedirs(new_folder)

# I create tuples containing the most common file formats for each type of file I'm going to analyse:

docs_extensions = (".txt", ".doc", ".docx", ".pages", ".odt", ".rtf", ".tex")
audio_extensions = (".mp3", ".mp4", ".m4a", ".wma", ".flac", ",aac")
images_extensions = (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff")

# I check if the recape file exists and create it if necessary
if not os.path.exists("recap.csv"):  # if it doesn't exist
    recap = open("recap.csv","w", newline="")# I create it
    writer = csv.writer(recap)
    writer.writerow(["name", "type", "size (B)"]) # I write csv header
else:  # if it already exists
    recap = open("recap.csv", "a", newline="") # I open it in 'append' with 'a'
    writer = csv.writer(recap)


# I iterate the files in the 'files' folder in alphabetical order:
for file in sorted(os.listdir()):
    
    file_name, file_extention = os.path.splitext(file) # I separate the file name from its extension
    current_folder = "" + file  # specifies the file path
    size = os.stat(current_folder).st_size  # calculates file size

   # I use the 'endswith' method to take file formats and allocate them into the corresponding folders:
    if file.endswith(docs_extensions):
        # this will be the folder into which the docs file will be moved
        final_folder = ("docs/" + file)
        # with shutil.move I move the file from the start folder to the destination folder
        shutil.move(current_folder, final_folder)
        writer.writerow([file_name, "doc", size])  # update recap
        # print the required file info (name, type and size in bytes) according to the desired output
        print("{} type:doc size:{}B".format(file_name, size))

    elif file.endswith(audio_extensions):
        # this will be the folder into which the audio file will be moved
        final_folder = ("audio/" + file)
        # with shutil.move I move the file from the start folder to the destination folder
        shutil.move(current_folder, final_folder)
        writer.writerow([file_name, "audio", size])  # update recap
        # print the required file info (name, type and size in bytes) according to the desired output
        print("{} type:audio size:{}B".format(file_name, size))

    elif file.endswith(images_extensions):
        # this will be the folder into which the image file will be moved
        final_folder = ("images/" + file)
        # with shutil.move I move the file from the start folder to the destination folder
        shutil.move(current_folder, final_folder)
        writer.writerow([file_name, "image", size])  # update recap
        # print the required file info (name, type and size in bytes) according to the desired output
        print("{} type:image size:{}B".format(file_name, size))

recap.close()  # I close the recap file at the end of compilation

bw type:image size:94926B
ciao type:doc size:12B
daffodil type:image size:24657B
eclipse type:image size:64243B
pippo type:doc size:8299B
song1 type:audio size:1087849B
song2 type:audio size:764176B
trump type:image size:10195B


The following script, instead, analyses the images in the images folder and extracts their colour information using the Numpy library.
Finally, it prints out a table containing the information gathered.

In [2]:
# Script step 3

# import the libraries we will need
import os
import numpy as np
from PIL import Image
from tabulate import tabulate

# create an empty table that will be filled at each iteration
tab = []

# I iterate the files in the 'images' folder
for im in os.listdir("images"):
    image_file = ("images/" + im) # specifies the file path
    image = Image.open(image_file)  # opens the image
    image_array = np.array(image)  # transforms the image into a numpy array
    name = im.split(".")[0]  # separates the name from the format to isolate the name
    height = np.shape(image_array)[0]  # returns the height of the image
    width = np.shape(image_array)[1]  # returns the width of the image

# I use 'ndim' to perform different operations depending on the number of array dimensions
    if image_array.ndim == 2:  # if the dimensions are 2, it means that it is a black and white image
        grayscale = np.mean(image_array)   # I calculate the grey scale
        R, G, B, ALPHA = 0.00, 0.00, 0.00, 0.00

    elif image_array.ndim == 3:  # if the dimensions are 3, it means that it is a colour image
        grayscale = 0.00  # being a colour image, the greyscale will be 0
        # using array slicing we calculate the colour scale by averaging it
        R = np.mean(image_array[:, :, 0])
        G = np.mean(image_array[:, :, 1])
        B = np.mean(image_array[:, :, 2])

        # if the elements of the third dimension are 3, the image is RGB, so ALPHA will be 0
        if np.shape(image_array)[2] == 3:
            ALPHA = 0.00

        # if the elements of the third dimension are 4, the image is RGBA, so we will calculate the value of ALPHA
        elif np.shape(image_array)[2] == 4:
            ALPHA = np.mean(image_array[:, :, 3])

# I create a list with the information obtained and add it to the 'table' list, thus creating a list of lists
    tab.append([name, height, width, grayscale, R, G, B, ALPHA])

# finally with the 'tabulate' module we create and print the table, specifying column labels in the headers
headers = ["name", "height", "width", "grayscale", "R", "G", "B", "ALPHA"]
print(tabulate(tab, headers=headers,tablefmt="fancy_grid",floatfmt='.2f'))

╒══════════╤══════════╤═════════╤═════════════╤════════╤════════╤═══════╤═════════╕
│ name     │   height │   width │   grayscale │      R │      G │     B │   ALPHA │
╞══════════╪══════════╪═════════╪═════════════╪════════╪════════╪═══════╪═════════╡
│ bw       │      512 │     512 │       21.48 │   0.00 │   0.00 │  0.00 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ daffodil │      500 │     335 │        0.00 │ 109.25 │  85.56 │  4.97 │    0.00 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ eclipse  │      256 │     256 │        0.00 │ 109.05 │ 109.52 │ 39.85 │  133.59 │
├──────────┼──────────┼─────────┼─────────────┼────────┼────────┼───────┼─────────┤
│ trump    │      183 │     275 │        0.00 │  97.01 │  98.99 │ 90.92 │    0.00 │
╘══════════╧══════════╧═════════╧═════════════╧════════╧════════╧═══════╧═════════╛
