In [6]:
import pandas as pd
import yaml
import re
import sys

In [7]:
class ExcelTableReader:
    '''
    Reads a table of data from an Excel spreadsheet and creates a Pandas DataFrame from it.
    The table is required to have a single row of column headers at the top of the table.
    @param url A path to an Excel spreadsheet with the name of the sheet appended after a colon.
               Example: 'C:/myDocuments/mySpreadsheets/wonderful.xlsx:Sheet1'
    @param excel_range A string representing a range in Excel. The first row must be the column headers.
                       Example: "A3:D10"
    @return A Pandas DataFrame built from the data provided.
    '''
    def __init__(self, url, excel_range):
        self.url         = url
        self.excel_range = excel_range
        
    def read(self):
        path, sheet                                    = self._parse_url()
        
        first_column, last_column, first_row, last_row = self._parse_range()
        
        # Not that Excel columns start at 1, but Pandas counts rows from 0, so need to offset the
        # header by 1
        df                                             = pd.read_excel(io         = path, 
                                                                       sheet_name = sheet,
                                                                       header     = first_row -1, 
                                                                       usecols    = first_column+':'+last_column, 
                                                                       nrows      = last_row - first_row)
        return df
    
    def _parse_url(self):
        '''
        Given a url of form "<some string A, maybe with colons>:<some string B without colons>"
        it returns the two substrings separated by the last colon
        '''
        s = re.split(':', self.url)
        if len(s) < 2:
            raise ValueError ("Incorrectly formatted url was given: '" + self.url
                             +"'. Should instead be formmated like this example: "
                             + "'C:/MyDocuments/MySpreadsheets/Wonderful.xlsx:SheetName'")
        sheet = s[len(s)-1]
        path = self.url.split(':' + sheet)[0]
        if len(path) == 0 or len(sheet) ==0:
            raise ValueError ("Incorrectly formatted url was given: \n\t'" + self.url
                             + "'\nShould instead be formmated like this example, with a non-empty path and a non-empty"
                             + " sheet name separated by the last colon in the url: \n"
                             + "\t'C:/My Documents/My Spreadsheets/Wonderful.xlsx:SheetName'")
        return path, sheet
    
    def _parse_range(self):
        '''
        Parses strings for Excel ranges like 'C5:DA15' and returns the columns and rows: 'C', 'DA', 5, 15.
        If the given range is not correctly formatted then throws an exception
        '''
        REGEX = '([A-z]+)([1-9][0-9]*):([A-z]+)([1-9][0-9]*)'
        res = re.match(REGEX, self.excel_range)
        if (res == None or len(res.groups()) != 4):
            raise ValueError ("Incorrectly formatted Excel range was given: '" + self.excel_range 
                              + "'. Should instead be formatted like this example: 'C5:DA15'")
        return res.group(1), res.group(3), int(res.group(2)), int(res.group(4))
        

In [15]:
def applyMarathonJourneyPlan(product, scoring_cycle, environment, planning_scenario, 
                             plan_name, plan_maturity, url, excel_range, repo_root_dir,
                            estimating_mgr, estimation_date, user):
    reader        = ExcelTableReader(url, excel_range)
    JOURNEY       = 'Modernization'
    
    plan_df       = reader.read()
    if len(plan_df.columns) != 2:
        raise ValueError ("Badly formatted Marathon Plan: should have exactly two columns, ideally called: 'Workstream' and "
                         + "'Effort'. Error when processing range= '" + excel_range + "' and url=\n\t" + url)
    plan_df.columns = ['Workstream', 'Effort']
    
    manifest_dict = {}
    workstreams   = []
    WORKSTREAM_ID = 1
    for row in plan_df.iterrows():
        workstreams.append({'workstream': row[1]['Workstream'], 'effort': row[1]['Effort'], 
                            'workstream-id': 'ws-' + str(WORKSTREAM_ID)})
        WORKSTREAM_ID += 1

    # Namespae would typically be something like 'Development' or 'Production'
    metadata      = {'namespace': environment + '.' + scoring_cycle, 
                     'name': product + '.' + JOURNEY + '.' + planning_scenario,
                     'labels': {'product': product, 'scoringCycle': scoring_cycle, 'scenario': planning_scenario,
                                                  'journey': JOURNEY}}

    manifest_dict['apiVersion']     = 'journeys.ea.io/v1'
    manifest_dict['kind']           = 'JourneyPlan'
    manifest_dict['metadata']       = metadata
    # Plan maturity can be one of: 'Not done', 'Drafted', 'Checked', 'Published'
    manifest_dict['planMaturity']   = plan_maturity
    manifest_dict['plan']           = {'type': 'Marathon', 
                                       'estimated_by': estimating_mgr, 
                                        'estimated_on': estimation_date,
                                        'recorded_by': user,
                                       'workstreams': workstreams}    
    
    yaml.dump(manifest_dict, sys.stdout)
    
    with open(repo_root_dir + '/' + product + '_marathon_plan.yaml', 'w') as file:
        yaml.dump(manifest_dict, file)

In [16]:
def applyInvestmentCommittment(product, scoring_cycle, environment, planning_scenario, 
                             plan_name, plan_maturity, url, excel_range, repo_root_dir, committing_mgr,
                              committing_date, user):
    reader        = ExcelTableReader(url, excel_range)
    JOURNEY       = 'Modernization'
    
    plan_df       = reader.read()
    if len(plan_df.columns) != 2:
        raise ValueError ("Badly formatted Investment Plan: should have exactly two columns, ideally called: 'Period' and "
                         + "'Investment'. Error when processing range= '" + excel_range + "' and url=\n\t" + url)
    plan_df.columns = ['Period', 'Investment']
    
    manifest_dict = {}
    investment_ts   = []
    for row in plan_df.iterrows():
        investment_ts.append({'period': row[1]['Period'], 'investment': row[1]['Investment'],
                             'units': 'person-days'})

    # Namespae would typically be something like 'Development' or 'Production'
    metadata      = {'namespace': environment + '.' + scoring_cycle, 
                     'name': product + '.' + JOURNEY + '.' + planning_scenario,
                     'labels': {'product': product, 'scoringCycle': scoring_cycle, 'scenario': planning_scenario,
                                                  'journey': JOURNEY}}

    manifest_dict['apiVersion']     = 'journeys.ea.io/v1'
    manifest_dict['kind']           = 'JourneyInvestment'
    manifest_dict['metadata']       = metadata
    # Plan maturity can be one of: 'Not done', 'Drafted', 'Checked', 'Published'
    manifest_dict['planMaturity']   = plan_maturity
    manifest_dict['committment']           = {'committed_by': committing_mgr, 
                                              'committed_on': committing_date,
                                              'recorded_by': user,
                                              'investment': investment_ts}    
    
    yaml.dump(manifest_dict, sys.stdout)
    
    with open(repo_root_dir + '/' + product + '_investment_committment.yaml', 'w') as file:
        yaml.dump(manifest_dict, file)

In [17]:
ROOT              = 'C:/Users/ahernand/Finastra/Tracking Datasets - Documents/EA Roadmaps & Indicators -Evidence'
SCORING_CYCLE     = 'QBR June 20'
PRODUCT           = 'Essence'
PLANNING_SCENARIO = 'Default'
PLAN_NAME         = PRODUCT + "_Modernization"
RESOURCE          = 'Marathon Investment for Essence.xlsx'

In [18]:
PATH = ROOT + '/' + SCORING_CYCLE + '/' + PRODUCT + '/' + RESOURCE
RANGE = 'C5:D15'
URL = PATH + ':' + 'Investment Plan for Product X'
REPO_DIR = '../manifests'

In [19]:
applyMarathonJourneyPlan (estimating_mgr = 'jay.pillai@finastra.com', estimation_date = '06 April 2021',
                            user = 'dipak.ray@finastra.com',
                            product=PRODUCT, planning_scenario=PLANNING_SCENARIO, scoring_cycle=SCORING_CYCLE, 
                            environment='Production', plan_name=PLAN_NAME, plan_maturity='Published', 
                            url=URL, excel_range=RANGE, repo_root_dir=REPO_DIR)

apiVersion: journeys.ea.io/v1
kind: JourneyPlan
metadata:
  labels:
    journey: Modernization
    product: Essence
    scenario: Default
    scoringCycle: QBR June 20
  name: Essence.Modernization.Default
  namespace: Production.QBR June 20
plan:
  estimated_by: jay.pillai@finastra.com
  estimated_on: 06 April 2021
  recorded_by: dipak.ray@finastra.com
  type: Marathon
  workstreams:
  - effort: 1000
    workstream: "Move to\_Azure Managed SQL PaaS "
    workstream-id: ws-1
  - effort: 8000
    workstream: Events and Notification
    workstream-id: ws-2
  - effort: 1000
    workstream: Security
    workstream-id: ws-3
  - effort: 5000
    workstream: UI/UX
    workstream-id: ws-4
  - effort: 4000
    workstream: Data  Analytics and Reporting
    workstream-id: ws-5
  - effort: 2000
    workstream: Interfaces/integration
    workstream-id: ws-6
  - effort: 12000
    workstream: 'Decomposition into microservices '
    workstream-id: ws-7
  - effort: 2000
    workstream: Containerization

In [20]:
INVESTMENT_RANGE = 'F6:G11'
applyInvestmentCommittment (committing_mgr = 'anand.subbaraman@finastra.com', committing_date = '06 April 2021',
                            user = 'dipak.ray@finastra.com',
                            product=PRODUCT, planning_scenario=PLANNING_SCENARIO, scoring_cycle=SCORING_CYCLE, 
                          environment='Production', plan_name=PLAN_NAME, plan_maturity='Published', 
                          url=URL, excel_range=INVESTMENT_RANGE, repo_root_dir=REPO_DIR)

apiVersion: journeys.ea.io/v1
committment:
  committed_by: anand.subbaraman@finastra.com
  committed_on: 06 April 2021
  investment:
  - investment: 4000
    period: FY 2021
    units: person-days
  - investment: 7000
    period: FY 2022
    units: person-days
  - investment: 13000
    period: FY 2023
    units: person-days
  - investment: 13000
    period: FY 2024
    units: person-days
  - investment: 13000
    period: FY 2025
    units: person-days
  recorded_by: dipak.ray@finastra.com
kind: JourneyInvestment
metadata:
  labels:
    journey: Modernization
    product: Essence
    scenario: Default
    scoringCycle: QBR June 20
  name: Essence.Modernization.Default
  namespace: Production.QBR June 20
planMaturity: Published


<h1> SCRAP </h1>

In [95]:
manifest_dict = {}
workstreams = []
for row in df.iterrows():
    workstreams.append({'workstream': row[1]['Workstream'], 'effort': row[1]['Effort']})
    
# Status can be one of: 'Development', 'Production'
metadata = {'namespace': 'Production', 'labels': {'product': PRODUCT, 'scoringCycle': SCORING_CYCLE}}
        
manifest_dict['apiVersion'] = 'ea.modernization.finastra.io/v1'
manifest_dict['kind'] = 'JourneyPlan'
manifest_dict['metadata'] = metadata
# Plan maturity can be one of: 'Not done', 'Drafted', 'Checked', 'Published'
manifest_dict['planMaturity'] = 'Published'
manifest_dict['plan'] = {'type': 'Marathon', 'workstreams': workstreams}

In [96]:
yaml.dump(manifest_dict, sys.stdout)

apiVersion: ea.modernization.finastra.io/v1
kind: JourneyPlan
metadata:
  labels:
    product: Essence
    scoringCycle: QBR June 20
  namespace: Production
plan:
  type: Marathon
  workstreams:
  - effort: 1000
    workstream: "Move to\_Azure Managed SQL PaaS "
  - effort: 8000
    workstream: Events and Notification
  - effort: 1000
    workstream: Security
  - effort: 5000
    workstream: UI/UX
  - effort: 4000
    workstream: Data  Analytics and Reporting
  - effort: 2000
    workstream: Interfaces/integration
  - effort: 12000
    workstream: 'Decomposition into microservices '
  - effort: 2000
    workstream: Containerization Essence
  - effort: 7000
    workstream: QA Automation
  - effort: 8000
    workstream: Cloud Operation(CI/CD,EOD,Monitoring)
planMaturity: Published


In [13]:
df1 = pd.DataFrame({'a': [1,2], 'b': [3,4]})
df1

Unnamed: 0,a,b
0,1,3
1,2,4


In [15]:
len(df1.columns)

2