# Illuminate to CEDARS Preprocessing for Submission

## Imports

In [None]:
import sys
if sys.version_info[0] < 3:
    import ConfigParser
else:
    import configparser
from glob import glob
from tqdm import tqdm_notebook
from os.path import basename
import os
import time
import datetime as dt
import zipfile
import paramiko

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

### Helper functions

In [None]:
def load_config(config_path = None):

    global config

    if sys.version_info[0] < 3:
        config = ConfigParser.ConfigParser()
    else:
        config = configparser.ConfigParser()

    if config_path is None:
        config.read("../config/config.ini")
    else:
        config.read(config_path)

    return config

def config_section_map(section):
    dict1 = {}
    options = config.options(section)
    for option in options:
        try:
            dict1[option] = config.get(section, option)
            if dict1[option] == -1:
                DebugPrint("skip: %s" % option)
        except:
            print("exception on %s!" % option)
            dict1[option] = None
    return dict1

def connect_to_sftp(host, port, username, password):
    """
    Function for connecting to SFTP using Paramiko. Returns a Paramiko SFTP object
    If you do not have Paramiko, enter "pip install paramiko" into your CMD
    """

    # Create a transport object with given host and port
    transport = paramiko.Transport((host, port))

    # Authorize transport with given username and password
    transport.connect(username = username, password = password)

    # Return an SFTP object from transport
    return paramiko.SFTPClient.from_transport(transport)

def configure_selenium_chrome(download_folder_path=None):
    options = webdriver.ChromeOptions()
    options.add_argument("window-size=1600,900")
    if download_folder_path:
        prefs = {
            "profile.default_content_settings.popups": 0,
            "download.default_directory": os.path.abspath(download_folder_path)
        }
        options.add_experimental_option("prefs", prefs)
    return webdriver.Chrome(chrome_options=options)

def get_most_recent_file_in_dir(folder_path):
    """Returns the most recently changed file in a folder.

    Args:
        folder_path: The path to the folder to search
    Returns:
        A string with the filename of the most recently changed file in the
        folder.
    """
    # * means all if need specific format then *.csv
    list_of_files = glob(folder_path + '/*')
    latest_file = max(list_of_files, key=os.path.getctime)
    return latest_file

## Download Files from Illuminate

In [None]:
conf = load_config('../config/config.ini')

In [None]:
#Illuminate UI credentials
illuminate_url = config_section_map("Illuminate")['url']
illuminate_username = config_section_map("Illuminate")['username']
illuminate_password = config_section_map("Illuminate")['password']

In [None]:
schoolyear_option = config_section_map("General")['schoolyear']
generation_wait = config_section_map("General")['file_generation_wait']

# Folder options for naming and file paths
folder_date = dt.date.today().strftime('%Y%m%d')
district_site = '_0000_' #MIGHT NEED TO UPDATE THIS IN A NON-SUMMIT CONTEXT
date_and_year = '_' + folder_date + '_' + schoolyear_option[:2] + str(int(schoolyear_option[2:]) - 1) + schoolyear_option
folder_stem = district_site + 'CEDARS' + date_and_year

In [None]:
# District options
districts = [
    {'name':config_section_map("Illuminate")['district_name'], 
     'option':config_section_map("Illuminate")['district_site_id'],
     'id':config_section_map("CEDARS SFTP")['district'], 
     'folder':config_section_map("CEDARS SFTP")['district'] + folder_stem}
] 

In [None]:
# A list of dictionaries with the names of the files, the url to get to the report, the stem in the file name, and the 
# Illuminate download name
reports = [
    {'name':'2017-2018 CEDARS A. Locations',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3535&page=ReportsCreatorState',
         'file_stem':district_site + 'Location' + date_and_year},
    {'name':'2017-2018 CEDARS B. District Students (NEW)',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3468&page=ReportsCreatorState',
         'file_stem':district_site + 'DistrictStudent' + date_and_year},
    {'name':'2017-2018 CEDARS C. School Students',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3410&page=ReportsCreatorState',
         'file_stem':district_site + 'SchoolStudent' + date_and_year},
    {'name':'2017-2018 CEDARS D. Course Catalog',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3582&page=ReportsCreatorState',
         'file_stem':district_site + 'CourseCatalog' + date_and_year},
    {'name':'2017-2018 CEDARS E. Student Schedule',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3331&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentSchedule' + date_and_year},
    {'name':'2017-2018 CEDARS F. Staff',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3332&page=ReportsCreatorState',
         'file_stem':district_site + 'Staff' + date_and_year},
    {'name':'2017-2018 CEDARS G. Staff Schedule',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3467&page=ReportsCreatorState',
         'file_stem':district_site + 'StaffSchedule' + date_and_year},
    {'name':'CEDARS H. Student Grade History (NEW)',
        'url':'https://summitwa.illuminateed.com/live/?action=studentGradeHistory&page=StateReporting_Wa_Cedars',
         'file_stem':district_site + 'StudentGradeHistory' + date_and_year},
    {'name':'2017-2018 CEDARS I. Student Programs',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3465&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentPrograms' + date_and_year},
    {'name':'2017-2018 CEDARS J. English Learners',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3594&page=ReportsCreatorState',
         'file_stem':district_site + 'BilingualPrograms' + date_and_year},
    {'name':'2017-2018 CEDARS K. Student Special Ed',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3624&page=ReportsCreatorState',
         'file_stem':district_site + 'SpecEdPrograms' + date_and_year},
    {'name':'2017-2018 CEDARS L. Student Ethnicity',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3333&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentEthnicity' + date_and_year},
    {'name':'2017-2018 CEDARS M. Student Race',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3466&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentRace' + date_and_year},
    {'name':'2017-2018 CEDARS N. Student Absence',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3456&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentAbsence' + date_and_year},
    {'name':'2017-2018 CEDARS P. Student Discipline',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3595&page=ReportsCreatorState',
         'file_stem':district_site + 'StudentDiscipline' + date_and_year},
    {'name':'2017-2018 CEDARS Q. Learning Assistance Program Student Growth',
        'url':'https://summitwa.illuminateed.com/live/?report_id=3577&page=ReportsCreatorState',
         'file_stem':district_site + 'LAPStudentGrowth' + date_and_year},
]


In [None]:
# Logs in to Illuminate
def login_to_illuminate_selenium(driver, host, username, password):
    sign_in_url = host + '/live/?prev_page=Main_NotDashboardPage&page=SisLogin'
    driver.get(sign_in_url)
    time.sleep(5)
    assert "Illuminate Education" in driver.title
    elem = driver.find_element_by_id("username")
    elem.clear()
    elem.send_keys(username)
    elem = driver.find_element_by_id("password")
    elem.send_keys(password)
    elem.send_keys(Keys.RETURN) # actuate the 'next' key that shows which district site to log in to
    time.sleep(3)
    elem = driver.find_element_by_id("button_login") # actuate the 'login' key (we can just log in using the default site)
    elem.click()
    time.sleep(3)

In [None]:
# Waits a certain amount of time until a specific file is in a folder given the file path
def wait_for_new_file_in_folder(file_path, wait):
    """ Waits until a specific file shows up in a folder.
    """
    file_found = False
    start = time.time()
    while True:
        stop = time.time()
        time_elapsed = start - stop
        if time_elapsed < wait:
            if os.path.isfile(file_path):
                time.sleep(1)
                break
            else:
                continue
        else:
            print('error: timed out')
            break
    

In [None]:
# Downloads one CEDARS file for one district
def download_one_CEDARS_file(driver, file, district, download_folder, district_folder):
    # Go to the report
    driver.get(file['url'])
    
    # Choose the district 
    try:
        districtSelect = Select(driver.find_element_by_id('SITE_ID'))
    except:
        districtSelect = Select(driver.find_element_by_id('site_id'))
    districtSelect.select_by_value(str(district['option']))
    
    # Choose the academic year
    try:
        yearSelect = Select(driver.find_element_by_id('ACADEMIC_YEAR'))
    except:
        yearSelect = Select(driver.find_element_by_id('academic_year'))
    yearSelect.select_by_value(str(schoolyear_option))
    
    # Choose date if option is there
    try:
        as_of_date = dt.date.today().strftime('%m/%d/%Y') 
        elem = driver.find_element_by_id('AS_OF_DATE')
        elem.click()
        elem.send_keys(as_of_date)
        time.sleep(2)
    except:
        pass
    
    # Generate report
    elem = driver.find_element_by_id('form_submit')
    elem.click()
    #time.sleep(10)
    
    # Download the file
    try:
        # Click on 'More...' for the download
        div_class = 'dataTables_wrapper form-inline'
        div_class_2 = 'row_fluid'
        button_class = 'btn btn-mini datatable-more-downloads'
        #xpath = "//div[@class='" + div_class + "']/div[@class='" + div_class_2 + "'//button[@class='" + button_class + "']"
        xpath = "//div[@class='" + div_class + "']//button[1]"
        elem = WebDriverWait(driver, generation_wait).until(EC.presence_of_element_located((By.XPATH, xpath)))
        elem.click()
        time.sleep(2)

        # Choose CEDARS or tab delimited with header
        try: 
            xpath = "//input[@value='Cedars']"
            elem = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, xpath)))
            elem.click()
        except:
            elem = driver.find_element_by_link_text('Delimited')
            elem.click()
        time.sleep(5)

        # Click download
        xpath = "//input[@value='Download']"
        elem = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath)))
        elem.click() 
        
        # Wait until file is downloaded (built in exception for edge case of file h)
        if file['name'] == 'CEDARS H. Student Grade History (NEW)':
            time.sleep(10)
        else:
            wait_for_new_file_in_folder(download_folder + '/' + file['file_stem'] + '.txt', 10)
        
    except:
        raise
    
    # Rename the file
    recent_file = get_most_recent_file_in_dir(download_folder)
    new_file = district_folder + '/' + district['id'] + file['file_stem'] + '.txt'
    os.rename(recent_file, new_file)
    
    print('- success - ', file['name'])

In [None]:
# Downloads all CEDARS files for one district
def download_one_district(driver, reports, district, download_folder):
    print('Downloading files for ', district['name'], ' . . .')
    # Create district folder or ignore if it exists
    district_folder = download_folder + '/' + district['folder']
    os.makedirs(district_folder, exist_ok=True)
    
    # Download all the files
    for file in tqdm_notebook(reports, total=len(reports)):
        download_one_CEDARS_file(driver, file, district, download_folder, district_folder)
    print('-----------------------')

In [None]:
# Downloads all CEDARS files for all districts
def download_all_districts(driver, reports, districts, download_folder):  
    # Download files for each district
    for district in tqdm_notebook(districts, total=len(districts)):
        download_one_district(driver, reports, district, download_folder)

In [None]:
# Create today's download folder or ignore if it exists
download_folder = config_section_map("General")['dex_output_dir'] + folder_stem[13:]
os.makedirs(download_folder, exist_ok=True)

In [None]:
# Start Chrome driver
driver = configure_selenium_chrome(download_folder)

In [None]:
# Login to Illuminate
login_to_illuminate_selenium(driver, illuminate_url,
                                    illuminate_username, illuminate_password)

In [None]:
# Download the files
download_all_district(driver, reports, district, download_folder)

In [None]:
# Close the driver
driver.close()

## Manual Processing

!!!! Before you run the cells below, do any manual processing you need to do or swap in any files that need to be swapped !!!
---------------------------------------

## Zip files

In [None]:
# Zip the files for each district
for district in districts:
    
    # Create the zip
    district_folder = download_folder + '/' + district['folder']
    zipp = zipfile.ZipFile(district_folder + '.zip','w')
    
    # Add each file to zip 
    for root, dirs, files in os.walk(district_folder):
        for name in files:
            # The basename argument allows you to zip the files at baselevel without parent directories
            zipp.write(os.path.join(root, name), basename(os.path.join(root, name))) 
            
    # Close the file
    zipp.close()

## Transfer the files to the SFTP server for submission

### Setup SFTP object

In [None]:
# Create log file
paramiko.util.log_to_file('paramiko_log.log')

# Set variables for connect_to_sftp() from config file
# NOTE: must set port to integer!
host = config_section_map("CEDARS SFTP")['host']
port = int(config_section_map("CEDARS SFTP")['port'])
username = config_section_map("CEDARS SFTP")['username']
password = config_section_map("CEDARS SFTP")['password']

# Set sftp variable to paramiko SFTP object returned by connect_to_sftp()
sftp = connect_to_sftp(host, port, username, password)

### Create parameters for files and paths for pushing files to SFTP server

In [None]:
# Create date and district year combo for file names and folder names
#next_year_object = dt.datetime.today() + dt.timedelta(days=365)
#next_year = next_year_object.strftime('%Y')
#dateyear = dt.datetime.today().strftime('%Y%m%d_%Y') + next_year
dateyear = dt.datetime.today().strftime('%Y%m%d_') + config_section_map("CEDARS SFTP")['school_year']


# Create a list of files to transfer based on CEDARS naming conventions and today's date
district = config_section_map("CEDARS SFTP")['district'].split(', ')
filenames = []
for d in district:
    filenames.append(str(d) + district_site + 'CEDARS_' + dateyear + '.zip')

# Set SFTP server filepath based off remotepath_folder in config file
remotepath_folder = config_section_map("CEDARS SFTP")['remotepath_folder']

# Set filepath folder on local machine to localpath_folder designated in config file
localpath_folder = config_section_map("CEDARS SFTP")['localpath_folder'] + dateyear + '/'

# Create a list of local filepaths based off of the localpath_folder and filenames list
localpaths = []
for f in filenames:
    localpaths.append(os.path.abspath(localpath_folder + f))

# Create a list of remote filepaths based off of the remotepath_folder and filenames list
remotepaths = []
for f in filenames:
    remotepaths.append(remotepath_folder + f)

### Push files to SFTP

In [None]:
# Push files from localpath_folder to remotepath_folder
for i, file in enumerate(localpaths):
    sftp.put(file, remotepaths[i])
    print('Success: ', file)