In [1]:
import os
import pandas as pd
import glob
from pathlib import  Path
import ftplib


path_to_folder = '/media/kusterlab/internal_projects/active/TOPAS/WP31/SIMSI/raw_files'
machines = ['lumos_1', 'eclipse_1', 'lumos_3', 'eclipse_2','lumos_2', 'eclipse_2']



def check_path_exist(func):
    """ 
    General  Decorator to check the file path before openening 
    returns list if the path doesnot exist and dataframe if exists
    USAGE: 
    
    @check_path_exist
    def your_pandas_function(path):
        <code>
    """
    def wrapper(path:str,*args,**kwargs):
        if os.path.exists(path):
            print('###')
            print(f'Reading the file {path}')
            try:
                df = func(path,*args,**kwargs)
                print(f'{path} loaded with success!')
                return df
            except:
                print(f'# ERROR in OPENING {path}')
                return [f'error in reading the file {path} probably opened somewhere']
        else:
            print(f'{path} does not exist')
            pass
    return wrapper



def get_the_machine_folder(file_name):
    return '/media/raw_files/'  + [x for x in machines if os.path.exists(os.path.join('/media/raw_files/',x,'raw',file_name))][0] + '/raw/' + file_name 



@check_path_exist
def massive_single_file_importer(original_file_path:str, active_session:ftplib,destination_file_name:str,destination_file_folder:str):
    """
    Importing a single file to the massive
    :original_file_path: original path of the file in the netapp or anywhere
    :active_session: the ftplib active session object
    :destination_file_name: the name of the file in the massive repo
    :destination_file_folder: the destinaiton_folder in massinve repo
    """
    try:
        active_session.mkd(destination_file_folder)
    except:
        print(f'Making the destination folder failed, probably the folder was existing in your repo')

    try:
        file = open(original_file_path,'rb') 
        destination_file_path = os.path.join(destination_file_folder,destination_file_name)
        active_session.storbinary(f'STOR {destination_file_path}', file)
        print(f'the file {original_file_path} was imported')
        file.close()
    except:
        print(f'{original_file_path} was not imported') 

# data set preparation

In [2]:
p = Path(path_to_folder)
file_names,fp_pp,batch_name = [] , [] , []
for i in p.glob('**/*.raw'):
    file_names.append(i.name)
    batch_name.append(str(i).split('/')[-2])
    fp_pp.append(str(i).split('/')[-3])
df = pd.DataFrame(list(zip(file_names,fp_pp,batch_name)),columns=['file_names','fp_pp','batch_name'])
df['original_file_path'] = df['file_names'].apply(lambda x:get_the_machine_folder(x))

# import to massive

In [3]:
session = ftplib.FTP('massive.ucsd.edu','asakhteman','<password>')
for i in range(len(df)):
    original_file_path = df['original_file_path'][i]
    file_name = df['file_names'][i]
    folder_name = df['batch_name'][i]
    print(file_name)
    massive_single_file_importer(original_file_path,session,file_name,folder_name)
session.quit()


5139_A01_058297_S00_U01_TMT11_R1.raw
###
Reading the file /media/raw_files/lumos_3/raw/5139_A01_058297_S00_U01_TMT11_R1.raw
the file /media/raw_files/lumos_3/raw/5139_A01_058297_S00_U01_TMT11_R1.raw was imported
/media/raw_files/lumos_3/raw/5139_A01_058297_S00_U01_TMT11_R1.raw loaded with success!
5139_A02_058297_S00_U02_TMT11_R1.raw
###
Reading the file /media/raw_files/lumos_3/raw/5139_A02_058297_S00_U02_TMT11_R1.raw
Making the destination folder failed, probably the folder was existing in your repo
the file /media/raw_files/lumos_3/raw/5139_A02_058297_S00_U02_TMT11_R1.raw was imported
/media/raw_files/lumos_3/raw/5139_A02_058297_S00_U02_TMT11_R1.raw loaded with success!
5139_A03_058297_S00_U03_TMT11_R1.raw
###
Reading the file /media/raw_files/lumos_3/raw/5139_A03_058297_S00_U03_TMT11_R1.raw
Making the destination folder failed, probably the folder was existing in your repo
the file /media/raw_files/lumos_3/raw/5139_A03_058297_S00_U03_TMT11_R1.raw was imported
/media/raw_files/lumos