# IMDB File Structure for Deployment

Purpose of this notebook is to create a file structure for deployment for the IMDB data. This will faciliate calling saved training data, testing data, and models.

In [1]:
#import needed libraries
from pprint import pprint
import os, json

In [2]:
#create directories function
def create_directories_from_paths(nested_dict):
    """OpenAI. (2023). ChatGPT [Large language model]. https://chat.openai.com 
    Recursively create directories for file paths in a nested dictionary.

    Parameters:
    nested_dict (dict): The nested dictionary containing file paths.
    """
    for key, value in nested_dict.items():
        if isinstance(value, dict):
            # If the value is a dictionary, recurse into it
            create_directories_from_paths(value)
        elif isinstance(value, str):
            # If the value is a string, treat it as a file path and get the directory path
            directory_path = os.path.dirname(value)
            # If the directory path is not empty and the directory does not exist, create it
            if directory_path and not os.path.exists(directory_path):
                os.makedirs(directory_path)
                print(f"Directory created: {directory_path}")

In [3]:
#define file paths
FPATHS = dict(
    data={
        'processed': 'Data-NLP/processed-data-no-html.joblib',
        'ml':{
            'train': 'Data-NLP/ml/train-data.joblib',
            'test': 'Data-NLP/ml/test-data.joblib'},
        'tf':{
            'train_tf':'Data-NLP/nn/train',
            'test_tf': 'Data-NLP/nn/test'}
    },
    images={
        'wordcloud-lemmas': 'Images/WordCloud-Joined-Lemmas.png',
        'wordcloud-tokens': 'Images/WordCloud-Joined-Tokens.png',
        'freqdist-lemmas': 'Images/FreqDist-Lemmas.png',
        'freqdist-tokens': 'Images/FreqDist-Tokens.png',
        'TheMovieDBLogo': 'Images/TheMovieDBLogo.svg',
        'TMDBLogo': 'Images/TMDBLogo.svg'
    },
    models={
        'logreg':'Models/ml/best-ml-logreg-model.joblib',
        'gru': 'Models/nn/gru'
    },
)
pprint(FPATHS)

{'data': {'ml': {'test': 'Data-NLP/ml/test-data.joblib',
                 'train': 'Data-NLP/ml/train-data.joblib'},
          'processed': 'Data-NLP/processed-data-no-html.joblib',
          'tf': {'test_tf': 'Data-NLP/nn/test',
                 'train_tf': 'Data-NLP/nn/train'}},
 'images': {'TMDBLogo': 'Images/TMDBLogo.svg',
            'TheMovieDBLogo': 'Images/TheMovieDBLogo.svg',
            'freqdist-lemmas': 'Images/FreqDist-Lemmas.png',
            'freqdist-tokens': 'Images/FreqDist-Tokens.png',
            'wordcloud-lemmas': 'Images/WordCloud-Joined-Lemmas.png',
            'wordcloud-tokens': 'Images/WordCloud-Joined-Tokens.png'},
 'models': {'gru': 'Models/nn/gru',
            'logreg': 'Models/ml/best-ml-logreg-model.joblib'}}


In [4]:
#save the filepaths
os.makedirs('config/', exist_ok=True)
PATHS_FILE = 'config/filepaths.json'
with open (PATHS_FILE, 'w') as f:
    json.dump(FPATHS, f)

In [5]:
#run the function
create_directories_from_paths(FPATHS)