# 1. Import all required Modules and custom build Classes into the Jupyter notebook
Import all required modules for the project.<br />
Import all custom modules for the project and instantiate classes using the config file.<br />
Custom modules can now be called directly in the notebook.


In [1]:
import sys
from sys import getsizeof as dictsize
import os
import json
import time
import pymongo
import urllib
import pandas as pd
import numpy as np
import urllib
import urllib.request

try:
    import createNewData.data.config as in_config
    from createNewData.pypackages.Azure import Azure
    from createNewData.pypackages.urlHandler import UrlHandler
    AzurePackage = Azure(in_config)
    Url = UrlHandler(in_config)
    
    
except ImportError as e:
    print(in_config.FailedImport)
    print(e)

# 2. Upload to SQL Database
Upload an entire table from a txt or csv file to the Azure SQL Database.
Required fields:
* "UploadToSQL" is the class name for uploading tables to the database.
* "df" is the dataframe to be written to the database.
* tablename is the name of the new table to be created.

In [None]:
df = pd.read_csv(in_config.shapes)

SqlDataCursor = AzurePackage("UploadToSQL",
                              df,
                              "shapes")

# 3. Return All Unique Shape Id's From SQL Database
1. Collect all items in the 'shapes' schema of shared team Database
2. Reduce Dataframe by removing dupicate coordinates.
3. Pop() coordinates based on batch sizes no greater than 1024 bytes.
4. Add each batch list to a list of batches to send the the Open-elevations API

### Attributes
* Azure class imported with call functionality (in)
* Config File (in)
* listOfBatches (out)


In [2]:
listOfBatches = []

try:
    df = AzurePackage("SelectAllData", "[shapes]")
    coordinateDF = df[["shape_id", "shape_pt_lat","shape_pt_lon"]]
    request = coordinateDF.drop_duplicates(subset=None, 
                                        keep='first', 
                                        inplace=False)
    coordinatesReq = Url("generateLocationRequest", request)
    for key, value in coordinatesReq.items():
        if key == "locations":
            locations = value
    for each in value:
        batches = {"locations" : []}
        requestSize = 0
        while dictsize(batches["locations"]) +\
                            dictsize(each) +\
                            dictsize(batches) < 1024:
            location = value.pop(0)
            batches["locations"].append(location)
        listOfBatches.append(batches)
    for each in listOfBatches:
        if dictsize(each) > 1024:
            raise Exception(in_config.RequestToBig)
        else:
            pass

except pd.io.sql.DatabaseError as e:
    print(in_config.NoSQLShema)

except urllib.request.HTTPError as e:
    if e.code == "403":
        print(in_config.SQLConnectionFail)
    
except Exception as e:
    print(in_config.UNKMGO)
    print(e)


In [4]:
for each in listOfBatches:
    shapeData = Url("mineElevationData",each)
    print(shapeData)
    time.sleep(10)

HTTPError: HTTP Error 504: Gateway Time-out

# Collect Elevation Data and Upload to MongoDB
Collect the longtitude and lattitude for elevation data from the shapes table.
Store data to the mongoDB database in Azure Cosmos.

In [None]:
shapeList = []
AzurePackage("DropMongoColl","shapes")
for each in shapeIds.iterrows():
    try:
        # Generate the Pandas table of all the Longtitudes and Latitudes
        # for each shape
        elevations = AzurePackage("SelectLongLat",
                                "[shape_id],[shape_pt_lat],[shape_pt_lon]",
                                "[dbo].[shapes]",
                                "[shape_id]",
                                each[1][0])

        # Generate the Json document for upload to MongoDB
        shapeData = Url("mineElevationData",elevations)
    except urllib.error.HTTPError as e:
        print(in_config.URLOOD)
        print(e)

    try:
        # Upload the Json document to MongoDB
        if not len(shapeData) == 0:
            AzurePackage("UploadToMongo","shapes",shapeData)
        else:
            raise Exception(in_config.NDIDF)
    except TypeError as e:
        print(in_config.TEC)
    except pymongo.errors.DuplicateKeyError as e:
        print(in_config.FIDB)
    except Exception as e:
        print(in_config.UNKMGO)
        print(e)



# Read Data From MongoDB and Write to Pandas DataFrame
This reads data from the raw Json files in the MongoDB database and imports them to a dataframe that contains only unique values. 
Removing any rows where any full row duplicates exist.<br />
Finally, this tests that all the elevation data was collected correctly by summing the values of the elevations and therefore ruling out any NaN values. An exception will be raised here in the final script to indicate that the elevation collection was unsuccessfull.

In [None]:
listOfObjects = []
listOfElevations = []


try:
    dbcollections = AzurePackage("SelectFromMongo")
    for each in dbcollections.find():
        for key, value in each.items():
            if type(value) is list:
                listOfObjects.append(value)
        

    for each in listOfObjects:
        for elevation in each:
            listOfElevations.append(elevation)

    df = pd.DataFrame(listOfElevations)
    dfTrimmed = df.drop_duplicates()
    sumElevation = dfTrimmed["elevation"].sum()
    if type(sumElevation) in [np.int64,int]:
        print("Elevations collected correctly")
        print(dfTrimmed.head())
    else: 
        raise Exception("Failed to collect all elevations, please try again.")
except KeyError as e:
    print(f"Column {e} cannot be found in the dataframe.")
except NameError as e:
    print(f"The Datatable {e} cannot be found.")
except Exception as e:
    print(in_config.UNKMGO)
    print(e)

# Upload Trimmed Elevations DataFrame to SQL

This saves the resulting SQL schemata to the development database.
This will overwrite any existing data in the SQL schemata that already exists.


In [None]:
SqlDataCursor = AzurePackage("UploadToSQL",
                              dfTrimmed,
                              "elevations",
                              in_config.connQuote)

# Collect real time data and upload to Mongo
This collects the real time data as a json file and overwrites the collection in the mongoDB database.
* URL used https://gtfsr.transportforireland.ie


In [None]:
url = in_config.url2
headers = in_config.RTIheaders
response = Url("callURL", url, {}, headers)
JsonData = response.read().decode('utf8').replace("'", '"')
RTIgtfs = json.loads(JsonData)
try:
    AzurePackage("DropMongoColl","RTIgtfs")
    AzurePackage("UploadToMongo","RTIgtfs",RTIgtfs)
except pymongo.errors.WriteError as e:
    print("An error occured while attempting to write the GTFS data to Mongo Database.")
    print(type(e))
except HTTPError as e:
    print("An error occured while attempting to connect to the Mongo Database.")
    print(type(e))


# Save Trimmed Elevation Data Team SQL Database
This reads data from the raw Json files in the MongoDB database and imports them to a dataframe that contains only unique values. Removing any rows where any full row duplicates exist.
This saves the resulting SQL schemata to the Database used by the R-Shiny app. (Production)
This will overwrite any existing data in the SQL schemata that already exists.

In [None]:
SqlDataCursor = AzurePackage("UploadToSQL",
                              dfTrimmed,
                              "elevations",
                              in_config.teamConnQuote)

# Connect the shape and elevation schema together by joining their longtitude and latatude values
This will either be a method in the Rshiny app to collect or we simply create a new database from this data but this seems a bit verbose.

In [None]:
conn = AzurePackage("AzureDBConn", in_config.connQuote)
SQLString = in_config.SQLElevation
df = pd.read_sql(SQLString, conn)
conn.close()

In [None]:
AzurePackage("UploadToMongo","RTIgtfs",RTIgtfs)

In [None]:
df = AzurePackage("SelectAllData", "[dbo].[shapes]")
coordinates = df[["shape_id", "shape_pt_lat","shape_pt_lon"]]
request = coordinates.drop_duplicates(subset=None, keep='first', inplace=False)

In [None]:

a = Url("generateLocationRequest", coordinates)
for key, value in a.items():
    print(sys.getsizeof(value))

In [None]:
shapeData = Url("mineElevationData", request)