<img src="_media/logo_stellantis.png" width="300">

<font size="+3"><b><center>Tutorial 03: Use SFTP Server</center></b></font>

This notebook explains how to download/upload files using a sftp server.

**Make sure you have created the *application.yml* file in the *conf* directory and filled in the *sftp* section (see this [section](https://github.psa-cloud.com/brc14/app00#user-specific-settings) of the *README* for more information)**

### Import

In [1]:
%load_ext autoreload
%autoreload 2
import os
import shutil
import datetime
import pandas as pd
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 100)
from random import choices
from app_template.infra.sftp import SftpClient



### Instantiate SftpClient object

When you create an instance of the SftpClient class, a sub-directory is created in the data directory.

This subdirectory serves as a transition directory for the files that you download from the sftp server or upload to the server.

In [2]:
if "data" in os.listdir("../../"):
    print("Before creating an instance")
    print(os.listdir("../../data"))
sftp_servers = SftpClient(local_dirname="sftp")
print("After creating an instance")
print(os.listdir("../../data"))

Before creating an instance
['sftp']
After creating an instance
['sftp']


In [3]:
#instance attributes
print(f"List of Sftp servers names: {sftp_servers.server_names}")
print(f"Local directory for sftp files: {sftp_servers.local_dirpath}")
print(f"Last files downloaded: {sftp_servers.last_downloaded_filenames}")

List of Sftp servers names: ['filebox_sftp', 'opv_ac_sftp']
Local directory for sftp files: /gpfs/user/sc24609/brc14/app00/data/sftp
Last files downloaded: []


### Upload files to sftp server

#### Create data for exemple

In [4]:
# Generate sample data
veh = pd.DataFrame([i for i in range(200)],columns=['vin'])
veh["brand"] = choices(["peugeot","opel","citroen","fiat"], k=200)
veh["col"] = choices(["green","black","white","red"], k=200)
veh["odometer"] = choices(range(20000), k=200)
veh.head(5)

Unnamed: 0,vin,brand,col,odometer
0,0,citroen,green,905
1,1,fiat,green,10046
2,2,peugeot,green,18827
3,3,fiat,green,8318
4,4,opel,black,17037


In [5]:
#Write data locally
now = datetime.datetime.now()
ts = f"{now.year}{now.month:02}{now.day:02}"
for brand in ["peugeot","opel","citroen","fiat"]:
    veh[veh["brand"] == brand].to_csv(os.path.join(sftp_servers.local_dirpath,f"{brand}_{ts}.csv"))

#### Upload multiple files

In [6]:
#Upload multiple files

print("Content in sftp server before uploading: \n")
print(sftp_servers.server_listdir(server_name='filebox_sftp',
                                server_dirpath="/",
                                sort=True)
     )
print("\n----------------------------------------------\n")

sftp_servers.upload_files(server_name='filebox_sftp', 
                    filenames=[f"peugeot_{ts}.csv",f"citroen_{ts}.csv"],
                    server_dirpath="/",
                    overwrite=True)

print("Content in sftp server after uploading: \n")
print(sftp_servers.server_listdir(server_name='filebox_sftp',
                                server_dirpath="/",
                                sort=True)
     )

Content in sftp server before uploading: 

{}

----------------------------------------------

Content in sftp server after uploading: 

{'citroen_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 14), 'is_directory': False}, 'peugeot_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 13), 'is_directory': False}}


#### Upload file and create folder 

In [7]:
#Upload file and create folder 
print("Content in sftp server before uploading: \n")
print(sftp_servers.server_listdir(server_name='filebox_sftp',
                                server_dirpath="/",
                                sort=True)
     )
print("\n----------------------------------------------\n")
sftp_servers.upload_files(server_name='filebox_sftp', 
                    filenames=f"fiat_{ts}.csv",
                    server_dirpath="/FCA",
                    overwrite=True)

print("Content in sftp server after uploading: \n")
print(sftp_servers.server_listdir(server_name='filebox_sftp',
                                server_dirpath="/",
                                sort=True)
     )
print("\n----------------------------------------------\n")

print("Content in the new folder in sftp server: \n")
print(sftp_servers.server_listdir(server_name='filebox_sftp',
                                server_dirpath="/FCA",
                                sort=True)
     )

Content in sftp server before uploading: 

{'citroen_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 14), 'is_directory': False}, 'peugeot_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 13), 'is_directory': False}}

----------------------------------------------

Content in sftp server after uploading: 

{'FCA': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 16), 'is_directory': True}, 'citroen_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 14), 'is_directory': False}, 'peugeot_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 13), 'is_directory': False}}

----------------------------------------------

Content in the new folder in sftp server: 

{'fiat_20210628.csv': {'lm_date': datetime.datetime(2021, 6, 28, 15, 8, 16), 'is_directory': False}}


### Download files from sftp server

#### Clean local directory

In [8]:
#Remove all files in local directory
def clean_local():
    for file in os.listdir(sftp_servers.local_dirpath):
        path = os.path.join(sftp_servers.local_dirpath,file)
        if os.path.isdir(path):
            shutil.rmtree(path)
        else:
            os.remove(path)

clean_local()
print("Content in local directory: \n")
print(os.listdir(sftp_servers.local_dirpath))

Content in local directory: 

[]


#### Download a specific file

In [9]:
sftp_servers.download_files(
    server_name='filebox_sftp',
    server_dirpath="/FCA",
    filename_patterns=f"fiat_{ts}.csv",
    keep_only_last=True,
    overwrite=True
)

print("Local directory content: \n")
print(os.listdir(sftp_servers.local_dirpath))
print("\n Last downloaded files: ",sftp_servers.last_downloaded_filenames)

clean_local()

Local directory content: 

['fiat_20210628.csv']

 Last downloaded files:  ['fiat_20210628.csv']


#### Download all files in a directory

In [10]:
# Download all files using filename_patterns=None
sftp_servers.download_files(
    server_name='filebox_sftp',
    server_dirpath="/",
    filename_patterns=None,
    keep_only_last=False,
    overwrite=True
)

print("Local directory content: \n")
print(os.listdir(sftp_servers.local_dirpath))
print("\n Last downloaded files: ",sftp_servers.last_downloaded_filenames)

clean_local()

Local directory content: 

['citroen_20210628.csv', 'peugeot_20210628.csv']

 Last downloaded files:  ['citroen_20210628.csv', 'peugeot_20210628.csv']


#### Download using pattern

In [11]:
# Download all files using regex pattern
sftp_servers.download_files(
    server_name='filebox_sftp',
    server_dirpath="/",
    filename_patterns='[a-z]*_[0-9]*.csv',
    keep_only_last=False,
    overwrite=True
)

print("Local directory content: \n")
print(os.listdir(sftp_servers.local_dirpath))
print("\n Last downloaded files: ",sftp_servers.last_downloaded_filenames)

clean_local()

Local directory content: 

['citroen_20210628.csv', 'peugeot_20210628.csv']

 Last downloaded files:  ['citroen_20210628.csv', 'peugeot_20210628.csv']


#### Download last modified file

In [12]:
# Download last modified file using keep_only_last_file=True
sftp_servers.download_files(
    server_name='filebox_sftp',
    server_dirpath="/",
    filename_patterns='[a-z]*_[0-9]*.csv',
    keep_only_last=True,
    overwrite=True
)

print("Local directory content: \n")
print(os.listdir(sftp_servers.local_dirpath))
print("\n Last downloaded files: ",sftp_servers.last_downloaded_filenames)

clean_local()

Local directory content: 

['citroen_20210628.csv']

 Last downloaded files:  ['citroen_20210628.csv']
