In [47]:
import pandas as pd
import numpy as np
import boto3 
import botocore
import xml.etree.ElementTree as ET
import requests
import json
import datetime
import zipfile
import os

In [195]:

baseURL = 'baseURL'
class S3Bucket:
    """
    Connection with AWS S3Bucket
    needs boto3 and botocore to make a connection and download
    """
    def __init__(self, access_key="", bucket="", secret_key=""):
        """
        access_key (string) = access_key for s3bucket
        bucket (string) = the bucket from which download will be requested
        secret_key (string) = secret_key for s3bucket
    
        """
        self.name = 's3'
        self.bucket = bucket
        self.client = boto3.client(self.name,
                              aws_access_key_id=access_key,
                              aws_secret_access_key=secret_key)

    
    def downloadFile(self, file_key, destination_path):
        """
        downloads the file from file_key (string) and saves it to path destination_path(string)
        the destination path must exists for successful download
        """
        self.client.download_file(self.bucket, 
                             file_key, 
                             destination_path)

class Server:
    """
    Contains all necessery constants to make a AYT call to backend
    Makes AYT call to backend and sets server parameters ready for fetching either from s3 or from backend
    Creates and assigns as a property a instance of S3Bucket after receiving config data from AYT
    
    """
    def __init__(self):
        """
        - authorization(string): authorization for the request
        - contentType: type of the requested content
        - agentUser: name of the client
        - endPoint: url of Backend API
        - s3bucket: instance of S3Bucket which enables downloading of files from S3
        """
        self.authorization = 'Basic'
        self.contentType = 'application/json'
        self.agentUser = "PythonML"
        self.endPoint = ""
        self.s3bucket = S3Bucket()
    
    def makeAYT(self):
        """
        - makes AYT call to backend 
        - parses the response. 
        - sets backend endPoint
        - sets S3Bucket
        """
        headers = self.createHeader()
        
        data = '{"operation":"AYT",
                    "version":{"protocol":
                                   {"domain_model":"5",
                                    "API":"4"},
                               "client":"PythonML"}}'
        
        response = requests.post(baseURL, headers=headers, data=data)
        js = response.json()
        
        for server in js['servers']:
            if server['name'] == 'json':
                self.endPoint = server['prefix']
            elif server['name'] == 's3':
                self.s3bucket = S3Bucket(access_key=server['access_key'],
                                         secret_key=server['secret_key'],
                                         bucket=server['bucket'])
    def createHeader(self):
        """
        return request header 
        """
        headers = {'Authorization': self.authorization,
                   'Content-Type': self.contentType,
                   'User-Agent': self.agentUser}
        return headers
    
    def downloadFileFromS3Bucket(self, file_key, destination_file):
        """
        downloads the file from file_key (string) and saves it to path destination_path(string)
        the destination path must exists for successful download
        """
        self.s3bucket.downloadFile(file_key, destination_file)
        
    
    def getJSONFromServer(self,record_type, operation="read", maxResults=1000, sortKey="created", 
                          sortDirection="descending", depth=1, 
                          filterValue='2040-01-01', filter_operator='<', filterKey='created'):
        """
        generic method for fetching any record type from the backend
        - record_type (str) - type to be fetched
        - operation (str) - which operation to be performed (default: "read")
        - maxResults (str) - maximum number of the items to be returned (default: 1000)
        - sortKey (str) - the key to be used for sorting  (default: "created")
        - sortDirection (str) - ascedning/descending (default: descending)
        - filterKey (str) - the key to be used for filtering (default: "created")
        - filterValue (str) - the value to be used for filtering (default: "2040-01-01")
        - filter_operator (str) - the operator to be applied to filterKey and filterValue (default: "<")
        
        returns serialized json from the response
        """
        data = json.dumps({"depth":depth,
                "type":record_type,
                "paging":
                    {"maxResults":maxResults,
                     "sort":
                         {"key":sortKey,
                          "direction":sortDirection}},
                "operation":operation, 
                "filter":[
                    {"value":filterValue,
                     "operator":filter_operator,
                     "key":filterKey}]})
        response = requests.post(self.endPoint, headers=self.createHeader(), data=data)
        js = response.json()
        return js
    
    def getAllScenes(self):
        """
        returns all scenes meta data from the backend
        """
        js = self.getJSONFromServer("scene", filterValue=str(datetime.datetime.now()))                
        js_all = [js]
        while len(js)>0:
            js_all.append(js)
            last = js[-1]
            last_date = last['meta']['created']
            js = self.getJSONFromServer("scene", filterValue=str(last_date))  
        jsallflat = [x for sub in js_all for x in sub]
        return jsallflat
    
    
    def getAllRealEstate(self):
        """
        returns all Real Esate Properties meta data from the backend
        """
        js = self.getJSONFromServer("real_estate_property", depth=2, filterValue=str(datetime.datetime.now()))                
        js_all = [js]
        while len(js)>0:
            js_all.append(js)
            last = js[-1]
            last_date = last['meta']['created']
            js = self.getJSONFromServer("real_estate_property", depth=2, filterValue=str(last_date))
        jsallflat = [x for sub in js_all for x in sub]
        return jsallflat
    
class DataParser:
    def getUrlForZipForScenes(self,js):
        """
        js (list) - list of dictionaries of scenes
        returns list of urls of the scenes provided as list of dict
        
        """
        urls = []
        for item in js:
            urls.append(item['data']['url'])
        return urls
    
class FileManager:
    def get_only_xml_from_zip(self, path_to_zip_file, destination_path ):
        """
        path_to_zip_file - path to the zip file
        destination_path - path for unzip folder
        unzips the file from the path_to_zip_file to destination_path and gets only the scene xml file in
        scenexmls folder
        """
        zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
        zip_ref.extractall(destination_path)
        zip_ref.close()
        os.remove(path_to_zip_file)
        os.remove(destination_path + "/original.jpg")
        os.rename(destination_path + "/project.xml", "scenexmls/"+key+".xml")
    
class SceneParser:
    def parseAllScenesXML(self):
        """
        Takes all xml files from the folder scenexmls and parses them
        return dataframe which contains as rows all products in all scenes and as columns:
        product_id, x, y, z, rotation and scene_id
        """
        dfs = []
        files = []
        for r, d, f in os.walk("scenexmls/"):
            for file in f:
                files.append(os.path.join(r, file))
        for file in files:
            if '.xml' in file:
                xmlfile = ET.parse(file)
                root = xmlfile.getroot()
                ready4df=[]
                for i in root.findall('object'):
                    ready4df.append(i.attrib)
                df = pd.DataFrame(ready4df)
                key = file.split("/")[1].split(".")[0]
                df['scene_id'] = key
                dfs.append(df)
        dfscenes = pd.concat(dfs).reset_index().drop('index', axis=1)
        return dfscenes
                

In [196]:
def createScenesBasetable()
    """
    Integrated flow fucntion
    returns scenes basetable with position and rotatio of each product in the scenes
    """
    #1. creates a server
    server = Server()
    #2. Makes a AYT call to obtain S3 bucket connection enablers and end point for meta data
    server.makeAYT()
    #3. Retrieves all scenes 
    allscenes = server.getAllScenes()
    #4. Get urls to download the files from S3Bucket
    urls = DataParser().getUrlForZipForScenes(jsallflat)
    #5. Downlaod all scene zip files and extract the xml file
    for url in set(urls):
        key = url.split("/")[0]
        server.downloadFileFromS3Bucket(url, "scenes/" + key + ".zip")
        FileManager().get_only_xml_from_zip("scenes/" + key + ".zip", "unzipped/" + key )
    #6 create dataframe from the scene xml
    dfscenes = SceneParser().parseAllScenesXML()
    return dfscenes
    

In [187]:
dfs = createScenesBasetable()
dfs.head()

Unnamed: 0,productId,rotation,scene_zip_id,x,y,z
0,d29cb731-1a03-4822-9f75-d523fabec631,-17.365966797,be9fa381-e026-41a4-9d98-5b034ffd8237,1.0482574701,-0.79321825504,-3.4925968647
1,cb855f91-879f-42e7-9793-eafda352b139,85.52897644,be9fa381-e026-41a4-9d98-5b034ffd8237,1.1975824833,-0.90163052082,-1.7694252729
2,ee543266-0e24-4029-be6e-f92a775cadcb,-36.798454285,be9fa381-e026-41a4-9d98-5b034ffd8237,1.2431117296,-0.79322725534,-3.2318880558
3,cb855f91-879f-42e7-9793-eafda352b139,4.3247184753,be9fa381-e026-41a4-9d98-5b034ffd8237,-0.33459293842,-0.87053245306,-2.8342216015
4,c7e7c896-c270-49e8-97a7-bbb01ca87c2c,-42.695732117,be9fa381-e026-41a4-9d98-5b034ffd8237,1.9278702736,-0.90137094259,-2.7674129009


In [197]:
df_real_estate = server.getAllRealEstate()