In [128]:
import os
import requests
import html
import json
import traceback
import pdfkit
import gspread
from gspread.exceptions import SpreadsheetNotFound
from oauth2client.service_account import ServiceAccountCredentials
import calendar
import time
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from plugin.file_uploader.file_uploader import FileUploader




In [129]:
# Params.json has the body of the request with template variables etc.
# See data/params.json for sample
with open('data/params.json', 'r') as f:
    params = json.loads(f.read())

# config.json contains all the template params (sheet ID etc)
# I have added one for the Rozgar template
with open('config.json') as json_file:
    CONFIG = json.load(json_file)

In [130]:

def get_tags(raw_data=None):
    """
    this method return all the tags on the basis of which we filter the request
    """
    tags = dict()
    if raw_data:
        tags["FORMID"] = raw_data["FORMID"]
        tags["USERNAME"] = raw_data["USERNAME"]
        tags["FORMSUBMISSIONDATE"] = raw_data["FORMSUBMISSIONDATE"]
        tags["INSTANCEID"] = raw_data["INSTANCEID"]
        tags["FORMNAME"] = CONFIG[raw_data["FORMID"]]["FORMNAME"]
        CONFIG["SHEETID"] = CONFIG[raw_data["FORMID"]]["SHEETID"]
        CONFIG["DOCTEMPLATEID"] = CONFIG[raw_data["FORMID"]]["DOCTEMPLATEID"]
        CONFIG["APPLICATIONID"] = CONFIG[raw_data["FORMID"]]["APPLICATIONID"]
        CONFIG['FORMNAME'] = tags["FORMNAME"]
        if 'FILENAMEFIELD' in CONFIG[raw_data["FORMID"]]:
            CONFIG['FILENAMEFIELD'] = CONFIG[raw_data["FORMID"]]["FILENAMEFIELD"]
    return tags


def fetch_data(req_data=""):
        form_id = req_data['formId']
        new_req_data = req_data['data'][0]  # Getting the data : [{values}]
        instance_id = new_req_data['instanceID']  # Getting the instance id for searching routes
        user_name_field = CONFIG[form_id]["USERNAMEFIELD"]
        new_req_data = json.loads(json.dumps(new_req_data))  # Getting the new data
        user_name = new_req_data[user_name_field]
        form_submission_date = new_req_data[
            '*meta-submission-date*']  # Correcting the submission date and removing the time
        end_index = form_submission_date.find(str('T'))
        form_submission_date = form_submission_date[:end_index]
        # Saving the corrected date in the json
        new_req_data['*meta-submission-date*'] = form_submission_date
        my_dict = {}
        for req_key, req_val in new_req_data.items():
            if isinstance(req_val, dict):
                for col_key, col_val in req_val.items():
                    if col_key == "url":
                        # correcting the URLs
                        base_url = 'http://aggregate.cttsamagra.xyz:8080/'
                        index_start = 0  # Finding the substring
                        index_end = col_val.find(
                            ":8080/") + 6  # Find the stopping point
                        newv1 = col_val.replace(col_val[index_start:index_end], base_url)
                        my_dict[req_key] = newv1
            elif isinstance(req_val, (float, int)):
                my_dict[req_key] = str(req_val)
            elif isinstance(req_val, list):
                my_dict[req_key] = str(req_val[0])  # Converting list to str
            else:
                if req_val is None:
                    req_val = "-"
                my_dict[req_key] = req_val

        # Calculate Udise from its database and then Calculate distance from udise
        calculated_distance = 'Not available'  # Calculate using udise
        my_dict['calculated_distance'] = calculated_distance
        all_data = dict()
        all_data['req_data'] = my_dict
        all_data['FORMID'] = form_id
        all_data['INSTANCEID'] = instance_id
        all_data['USERNAME'] = user_name
        all_data['FORMSUBMISSIONDATE'] = form_submission_date
        all_data["SHEETID"] = CONFIG[form_id]["SHEETID"]
        
        all_data["DOCTEMPLATEID"] = CONFIG[form_id]["DOCTEMPLATEID"]
        all_data["APPLICATIONID"] = CONFIG[form_id]["APPLICATIONID"]
        tags = get_tags(all_data)
        all_data.update(CONFIG)
        raw_data = dict()
        raw_data['reqd_data'] = all_data
        raw_data['tags'] = tags
        raw_data['instance_id'] = instance_id
        if 'DOCDELETED' in CONFIG[all_data["FORMID"]]:
            raw_data['is_delete'] = CONFIG[all_data["FORMID"]]["DOCDELETED"]
        else:
            raw_data['is_delete'] = True
        return raw_data, None


In [131]:
raw_data = fetch_data(params)

In [132]:
# Just separating the steps (writing to raw_)
with(open('data/raw_data.json', 'w') as f):
    f.write(json.dumps(raw_data, indent=4))

In [133]:
### get_mapping values, build_pdf
with(open('data/raw_data.json', 'r') as f):
    raw_data = json.loads(f.read())
form_id = raw_data[0]["reqd_data"]["FORMID"]
config = CONFIG[form_id]

def _get_token():
    """ The file token.pickle stores the user's access and refresh tokens, and is
        created automatically when the authorization flow completes for the first
        time."""
    client = None
    creds = None
    try:
        sheet_scopes = [
            'https://spreadsheets.google.com/feeds',
            'https://www.googleapis.com/auth/spreadsheets',
            'https://www.googleapis.com/auth/drive'
        ]
        # base_path = os.path.dirname(__file__)
        creds = ServiceAccountCredentials.from_json_keyfile_name('gcs-creds-af.json')
        client = gspread.authorize(creds)
    except Exception as ex:
        print(traceback.format_exc())
    return client, creds

def get_sheetvalues(sheet_id, var_mapping):
    """
    get google sheet data of the specified sheet id and range
    """
    error = None
    try:
        client = _get_token()[0]
        print(client)
        base_sheet = client.open_by_key(sheet_id)
        sheet = base_sheet.worksheet(var_mapping)
        values = sheet.get_all_values()
        # print(values)
        if not values:
            error = "No Mapping details found"
        else:
            mapping_values = values
    except SpreadsheetNotFound as ex:
        error = "Failed to fetch mapping detials - 1"
        mapping_values = None
        print(traceback.format_exc())
    except Exception as ex:
        print(traceback.format_exc())
        error = "Failed to fetch mapping detials - 2"
        mapping_values = None
        print(traceback.format_exc())
    return mapping_values, error



def fetch_mapping(data):
    """
    this method fetches mapping values and options from google sheet and update this in raw_data
    return it as raw_data
    """
    error = None
    # raw_data = None
    try:
        get_value_mapping = get_sheetvalues(data['SHEETID'], data['MAPPINGDETAILS'])
        mapping_error = get_value_mapping[1]  # Error in fetching mapping
        mapping_values = get_value_mapping[0]  # mapping values list
        get_options_mapping = get_sheetvalues(data['SHEETID'],
                                                    data['OPTIONSSHEET'])
        options_error = get_options_mapping[1]  # Error in fetching options
        options_mapping = get_options_mapping[0]  # options mapping list

        if not mapping_error and not options_error:
            raw_data = dict()
            raw_data['value_mapping'] = mapping_values
            raw_data['options_mapping'] = options_mapping
            data.update(raw_data)
            raw_data = data

        else:
            error = str(mapping_error) + str(options_error)

    except Exception as ex:
        error = "Failed to fetch mapping detials - 4"
    return raw_data, error

raw_data = fetch_mapping(raw_data[0]["reqd_data"])

16IXRddw912l0zQ31EHPnmnm6tPG-O2US5r3LHO_h4IM mappingDetails
<gspread.client.Client object at 0x10d691b80>
<gspread.client.Client object at 0x112177c40>


In [134]:
with(open('data/raw_data_with_mapping.json', 'w') as f):
    f.write(json.dumps(raw_data, indent=4))

In [135]:
def map_data(all_data, mapping_values, options_mapping):
    error = None
    final_data = None
    try:
        # info_log(self.logger.info, "Step4.1 Mapping Start", self.raw_data)
        final_data = []  # List to hold the final values
        mapping_values.pop(0)
        options_mapping.pop(0)
        for row in mapping_values:
            if row[1].lower() == 'options':
                options_mapping_keys = [x[0] for x in options_mapping]
                option_value_start = options_mapping_keys.index(row[2])
                if option_value_start == -1:
                    all_data[row[2]] = 'NO_TEXT_FOUND'  # If the particular option is not found
                    final_data.append(all_data[row[2]])
                else:
                    a = options_mapping_keys.index(row[2])
                    current_option_val = 'NO_TEXT_FOUND'
                    for i in options_mapping[a][1:]:
                        if i != '':
                            op_key = i.split("::")[0]
                            op_val = i.split("::")[1]
                            if op_key == all_data[row[2]]:
                                current_option_val = op_val
                                break
                    final_data.append(current_option_val)
            else:
                if not all_data[row[2]]:
                    all_data[row[2]] = 'NO_TEXT_FOUND'  # If data is None

                final_data.append(all_data[row[2]])  # Appending the received data to the final list
        # info_log(self.logger.info, "Step4.1 Mapping End", self.raw_data)

    except Exception as ex:
        print(traceback.format_exc())
        error = "Failed to map data"
        # info_log(self.logger.error, "Error3 " + error, self.raw_data)
        # self.logger.error("Exception occurred", exc_info=True)
    return final_data, error

def build_pdf(raw_data, file_name):
    """
    this method get raw_data and file name and generate pdf having this file_name
    """
    error = None
    pdf_name = None
    pdf_url = None
    pdf_path = None
    try:
        data = raw_data['req_data']
        mapping_values = raw_data['value_mapping']
        options_mapping = raw_data['options_mapping']
        mapped_data = map_data(data, mapping_values, options_mapping)
        html_content = requests.get("https://docs.google.com/document/d/" + CONFIG["DOCTEMPLATEID"] + "/export?format=html").content
        html_content = html.unescape(html_content.decode('utf-8'))
        # print(html_content)
        if 'FILENAMEFIELD' in raw_data and raw_data['FILENAMEFIELD'] in data:
            file_name = data[raw_data['FILENAMEFIELD']] + '_' + str(
                calendar.timegm(time.gmtime()))
        for idx, val in enumerate(mapped_data[0]):
            template_index = idx + 1
            string_to_search = '<<' + str(template_index) + '>>'
            html_content = html_content.replace(string_to_search, val)
        base_path = os.path.join(os.path.abspath(''), CONFIG['DIRPATH'])
        if not os.path.exists(base_path):
            os.makedirs(base_path)
        file_path = os.path.join(base_path, 'filled_template.html')
        with(open( file_path, 'w') as f):
            f.write(html_content)
        with open(file_path) as f:    
            pdf_path = os.path.join(base_path, file_name)
            pdfkit.from_file(f, pdf_path)
            pdf_name = file_name
    except:
        print(traceback.format_exc())
    return pdf_name, error, pdf_url, pdf_path

with(open('data/raw_data_with_mapping.json', 'r') as f):
    raw_data_with_mapping = json.loads(f.read())

pdf_name, error, pdf_url, pdf_path = build_pdf(raw_data_with_mapping[0], "out.pdf")
print(pdf_name, error, pdf_url, pdf_path)
# mapping_error = final_map[1]
# if not mapping_error:
#     mapped_data = final_map[0]
# print(mapped_data)


Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                      
out.pdf None None /Users/pritamps/samagra/PDF-Package/src/plugin/html_plugin/../../uploadFiles/out.pdf


In [136]:
with open('data/template.html', 'rb') as f:
    html_content = html.unescape(f.read().decode('utf-8'))

In [137]:
def _upload_file(base_path, file_path, key):
    if ('UPLOADTO' in CONFIG.keys() and CONFIG['UPLOADTO']):
        if CONFIG['UPLOADTO'] == 's3':
            cdn_upload = FileUploader(CONFIG['UPLOADTO'], CONFIG['ACCESSKEY'],
                                        CONFIG['SECRETKEY'])
        else:
            print(CONFIG['UPLOADTO'],
                                        base_path + '/' + 
                                        CONFIG['GOOGLE_APPLICATION_CREDENTIALS'])
            cdn_upload = FileUploader(CONFIG['UPLOADTO'],
                                        base_path + '/' +
                                        CONFIG['GOOGLE_APPLICATION_CREDENTIALS'])
        resp = cdn_upload.upload_file(file_path,
                                        CONFIG['BUCKET'], key)
        url = resp[0]
        error = resp[1]
        if url:
            upload_file_url = url
            expire_timestamp = resp[2]
            # os.remove(file_path)
        else:
            print("Error6 " + error, raw_data)

        print("Step5.1 Upload To Cdn End", raw_data)
        return upload_file_url, error, expire_timestamp

    
def upload_pdf(key, file_url, file_path=None):
        """
        Uploads a file to the local server and if we specify UPLOADTO in config file then save this
        file to cdn and delete file from local server.
        """
        error = ''
        upload_file_url = None
        expire_timestamp = None
        if not file_url and not file_path:
            error = "Please specify either file URL or file path"
            return "", error, ""
        if file_path:
            base_path = os.path.join(os.path.abspath(''))            
            upload_file_url, error, expire_timestamp = _upload_file(base_path, file_path, key)
                

        try:
            if file_url is None:
                error = "Please specify either file URL or file path"
                return "", error, ""
            response = requests.get(file_url)
            base_path = os.path.join(os.path.abspath(''), CONFIG['DIRPATH'])
            if not os.path.exists(base_path):
                os.makedirs(base_path)
            with open(base_path + key, 'wb') as file_obj:
                file_obj.write(response.content)
                upload_file_url = base_path + key
                base_path = os.path.dirname(__file__)
                upload_file_url, error, expire_timestamp = _upload_file(base_path, base_path + key, )

            # self._delete_file_drive(file_url)
            print(print, "Step5 Upload Pdf End", self.raw_data)

        except Exception as ex:
            error = "Failed to download file from drive"
            print("Error5 " + error, self.raw_data)
            print("Exception occurred")
        return upload_file_url, error, expire_timestamp


upload_pdf(pdf_name, None, pdf_path)

google /Users/pritamps/samagra/PDF-Package/src/plugin/html_plugin/gcs-creds-af.json
pdf-builder-af
Bucket done
Step5.1 Upload To Cdn End ({'req_data': {'*meta-instance-id*': 'uuid:b155fd16-ad6b-4496-ab96-32d06850e522', '*meta-model-version*': '1', '*meta-ui-version*': '-', '*meta-submission-date*': '2021-04-07', '*meta-is-complete*': 'True', '*meta-date-marked-as-complete*': '2021-04-07T08:36:32.235Z', 'introduction': '-', 'form': 'shalu', 'mobile_number': '9355170004', 'whatsapp_number': '9355170004', 'opt_district_name': '3', 'pincode': '124001', 'opt_highest_level_qualification': '7', 'opt_qualification_upto_8th': '-', 'opt_qualification_10th': '-', 'opt_qualification_12th': '-', 'opt_qualification_ITI': '-', 'opt_qualification_diploma_after_10th': '-', 'opt_qualification_diploma_after_12th': '5', 'opt_qualification_graduation': '-', 'opt_qualification_post_graduation': '-', 'opt_qualification_PhD': '-', 'marks_qualification': '70', 'date_of_birth': '1992-07-27', 'opt_gender': '2', 

('', 'Please specify either file URL or file path', '')

In [22]:
with(open('data/filled_template.html', 'w') as f):
    f.write(html_content)

with open('data/filled_template.html') as f:    
    pdfkit.from_file(f, 'data/out.pdf')

Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                      
