<h3>Automation Intro</h3>


<p>Creating Strings with Formatted Values</p>


In [1]:
data = [
    (1000, 10),
    (2000, 17),
    (2500, 170),
    (2500, -170),
]

In [2]:
print('REVENUE | PROFIT | PERCENT')

TEMPLATE = '{revenue:>7} | {profit:>+6} | {percent:>7.2%}'

REVENUE | PROFIT | PERCENT


In [3]:
for revenue, profit in data:
    row = TEMPLATE.format(revenue=revenue, profit=profit,
                          percent=profit / revenue)
    print(row)

   1000 |    +10 |   1.00%
   2000 |    +17 |   0.85%
   2500 |   +170 |   6.80%
   2500 |   -170 |  -6.80%


<p>Manipulating Strings</p>


In [4]:
INPUT_TEXT = """
    AFTER THE CLOSE OF THE SECOND QUARTER, OUR COMPANY , 
    CASTANACORP HAS ACHIEVED A GROWTH IN THE REVENUE OF 7.47%. 
    THIS IS IN LINE WITH THE OBJECTIVES FOR THE YEAR. 
    THE MAIN DRIVER OF THE SALES HAS BEEN THE NEW PACKAGE DESIGNED 
    UNDER THE SUPERVISION OF OUR MARKETING DEPARTMENT. 
    OUR EXPENSES HAVE BEEN CONTAINED, INCREASING ONLY BY 0.7%, 
    THOUGH THE BOARD CONSIDERS ITS NEEDS TO BE FURTHER REDUCED. 
    THE EVALUATION IS SATISFACTORY AND THE FORECAST FOR THE NEXT QUARTER IS OPTIMISTIC. 
    THE BOARD EXPECTS AN INCREASE IN PROFIT OF AT LEAST 2 MILLION DOLLARS
"""

In [5]:
words = INPUT_TEXT.split()
words

['AFTER',
 'THE',
 'CLOSE',
 'OF',
 'THE',
 'SECOND',
 'QUARTER,',
 'OUR',
 'COMPANY',
 ',',
 'CASTANACORP',
 'HAS',
 'ACHIEVED',
 'A',
 'GROWTH',
 'IN',
 'THE',
 'REVENUE',
 'OF',
 '7.47%.',
 'THIS',
 'IS',
 'IN',
 'LINE',
 'WITH',
 'THE',
 'OBJECTIVES',
 'FOR',
 'THE',
 'YEAR.',
 'THE',
 'MAIN',
 'DRIVER',
 'OF',
 'THE',
 'SALES',
 'HAS',
 'BEEN',
 'THE',
 'NEW',
 'PACKAGE',
 'DESIGNED',
 'UNDER',
 'THE',
 'SUPERVISION',
 'OF',
 'OUR',
 'MARKETING',
 'DEPARTMENT.',
 'OUR',
 'EXPENSES',
 'HAVE',
 'BEEN',
 'CONTAINED,',
 'INCREASING',
 'ONLY',
 'BY',
 '0.7%,',
 'THOUGH',
 'THE',
 'BOARD',
 'CONSIDERS',
 'ITS',
 'NEEDS',
 'TO',
 'BE',
 'FURTHER',
 'REDUCED.',
 'THE',
 'EVALUATION',
 'IS',
 'SATISFACTORY',
 'AND',
 'THE',
 'FORECAST',
 'FOR',
 'THE',
 'NEXT',
 'QUARTER',
 'IS',
 'OPTIMISTIC.',
 'THE',
 'BOARD',
 'EXPECTS',
 'AN',
 'INCREASE',
 'IN',
 'PROFIT',
 'OF',
 'AT',
 'LEAST',
 '2',
 'MILLION',
 'DOLLARS']

In [6]:
redacted = [''.join('X' if w.isdigit() else w for w in word) for word in words]
redacted

['AFTER',
 'THE',
 'CLOSE',
 'OF',
 'THE',
 'SECOND',
 'QUARTER,',
 'OUR',
 'COMPANY',
 ',',
 'CASTANACORP',
 'HAS',
 'ACHIEVED',
 'A',
 'GROWTH',
 'IN',
 'THE',
 'REVENUE',
 'OF',
 'X.XX%.',
 'THIS',
 'IS',
 'IN',
 'LINE',
 'WITH',
 'THE',
 'OBJECTIVES',
 'FOR',
 'THE',
 'YEAR.',
 'THE',
 'MAIN',
 'DRIVER',
 'OF',
 'THE',
 'SALES',
 'HAS',
 'BEEN',
 'THE',
 'NEW',
 'PACKAGE',
 'DESIGNED',
 'UNDER',
 'THE',
 'SUPERVISION',
 'OF',
 'OUR',
 'MARKETING',
 'DEPARTMENT.',
 'OUR',
 'EXPENSES',
 'HAVE',
 'BEEN',
 'CONTAINED,',
 'INCREASING',
 'ONLY',
 'BY',
 'X.X%,',
 'THOUGH',
 'THE',
 'BOARD',
 'CONSIDERS',
 'ITS',
 'NEEDS',
 'TO',
 'BE',
 'FURTHER',
 'REDUCED.',
 'THE',
 'EVALUATION',
 'IS',
 'SATISFACTORY',
 'AND',
 'THE',
 'FORECAST',
 'FOR',
 'THE',
 'NEXT',
 'QUARTER',
 'IS',
 'OPTIMISTIC.',
 'THE',
 'BOARD',
 'EXPECTS',
 'AN',
 'INCREASE',
 'IN',
 'PROFIT',
 'OF',
 'AT',
 'LEAST',
 'X',
 'MILLION',
 'DOLLARS']

In [7]:
ascii_text = [word.encode('ascii', errors='replace').decode(
    'ascii') for word in redacted]
ascii_text

['AFTER',
 'THE',
 'CLOSE',
 'OF',
 'THE',
 'SECOND',
 'QUARTER,',
 'OUR',
 'COMPANY',
 ',',
 'CASTANACORP',
 'HAS',
 'ACHIEVED',
 'A',
 'GROWTH',
 'IN',
 'THE',
 'REVENUE',
 'OF',
 'X.XX%.',
 'THIS',
 'IS',
 'IN',
 'LINE',
 'WITH',
 'THE',
 'OBJECTIVES',
 'FOR',
 'THE',
 'YEAR.',
 'THE',
 'MAIN',
 'DRIVER',
 'OF',
 'THE',
 'SALES',
 'HAS',
 'BEEN',
 'THE',
 'NEW',
 'PACKAGE',
 'DESIGNED',
 'UNDER',
 'THE',
 'SUPERVISION',
 'OF',
 'OUR',
 'MARKETING',
 'DEPARTMENT.',
 'OUR',
 'EXPENSES',
 'HAVE',
 'BEEN',
 'CONTAINED,',
 'INCREASING',
 'ONLY',
 'BY',
 'X.X%,',
 'THOUGH',
 'THE',
 'BOARD',
 'CONSIDERS',
 'ITS',
 'NEEDS',
 'TO',
 'BE',
 'FURTHER',
 'REDUCED.',
 'THE',
 'EVALUATION',
 'IS',
 'SATISFACTORY',
 'AND',
 'THE',
 'FORECAST',
 'FOR',
 'THE',
 'NEXT',
 'QUARTER',
 'IS',
 'OPTIMISTIC.',
 'THE',
 'BOARD',
 'EXPECTS',
 'AN',
 'INCREASE',
 'IN',
 'PROFIT',
 'OF',
 'AT',
 'LEAST',
 'X',
 'MILLION',
 'DOLLARS']

In [8]:
# Group the words into 80 character lines
newlines = [word + '\n' if word.endswith('.') else word for word in ascii_text]
newlines

['AFTER',
 'THE',
 'CLOSE',
 'OF',
 'THE',
 'SECOND',
 'QUARTER,',
 'OUR',
 'COMPANY',
 ',',
 'CASTANACORP',
 'HAS',
 'ACHIEVED',
 'A',
 'GROWTH',
 'IN',
 'THE',
 'REVENUE',
 'OF',
 'X.XX%.\n',
 'THIS',
 'IS',
 'IN',
 'LINE',
 'WITH',
 'THE',
 'OBJECTIVES',
 'FOR',
 'THE',
 'YEAR.\n',
 'THE',
 'MAIN',
 'DRIVER',
 'OF',
 'THE',
 'SALES',
 'HAS',
 'BEEN',
 'THE',
 'NEW',
 'PACKAGE',
 'DESIGNED',
 'UNDER',
 'THE',
 'SUPERVISION',
 'OF',
 'OUR',
 'MARKETING',
 'DEPARTMENT.\n',
 'OUR',
 'EXPENSES',
 'HAVE',
 'BEEN',
 'CONTAINED,',
 'INCREASING',
 'ONLY',
 'BY',
 'X.X%,',
 'THOUGH',
 'THE',
 'BOARD',
 'CONSIDERS',
 'ITS',
 'NEEDS',
 'TO',
 'BE',
 'FURTHER',
 'REDUCED.\n',
 'THE',
 'EVALUATION',
 'IS',
 'SATISFACTORY',
 'AND',
 'THE',
 'FORECAST',
 'FOR',
 'THE',
 'NEXT',
 'QUARTER',
 'IS',
 'OPTIMISTIC.\n',
 'THE',
 'BOARD',
 'EXPECTS',
 'AN',
 'INCREASE',
 'IN',
 'PROFIT',
 'OF',
 'AT',
 'LEAST',
 'X',
 'MILLION',
 'DOLLARS']

In [9]:
LINE_SIZE = 80
lines = []
line = ''

In [10]:
for word in newlines:
    if line.endswith('\n') or len(line) + len(word) + 1 > LINE_SIZE:
        lines.append(line)
        line = ''
        line = line + ' ' + word

In [11]:
# Format all the lines as titles and join them as a single piece of text
lines = [line.title() for line in lines]
result = '\n'.join(lines)

In [14]:
print(result)




<p>Extracting Data from Structured Strings</p>


In [15]:
import delorean
from decimal import Decimal

In [16]:
log = '[2018-05-05T11:07:12.267897] - SALE - PRODUCT: 1345 - PRICE: $09.99'

In [24]:
# Splitting the log into its parts which are divided by the dashes
divide_it = log.split(" - ")
divide_it

['[2018-05-05T11:07:12.267897]', 'SALE', 'PRODUCT: 1345', 'PRICE: $09.99']

In [25]:
timestamp_string, _, product_string, price_string = divide_it

In [26]:
timestamp = delorean.parse(timestamp_string.strip('[]'))
timestamp

Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC')

In [27]:
product_id = int(product_string.split(':')[-1])
product_id

1345

In [28]:
price = Decimal(price_string.split('$')[-1])
price

Decimal('9.99')

In [29]:
timestamp, product_id, price

(Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC'),
 1345,
 Decimal('9.99'))

In [None]:
class PriceLog(object):
    def __init__(self, timestamp, product_id, price):
        self.timestamp = timestamp
        self.product_id = product_id
        self.price = price

    def __repr__(self) -> str:
        return '<PriceLog ({}, {}, {})>'.format(
            self.timestamp,
            self.product_id,
            self.price
        )
        
    @classmethod
    def parse(cls, text_log):
        """A method to parse the log file

        Args:
            text_log (String): The text being parsed

        Returns:
            String: The parsed string
        """
        divide_it = text_log.split(' - ')
        tmp_string, _, product_string, price_string = divide_it
        timestamp = delorean.parse(tmp_string.strip('[]'))
        product_id = int(product_string.split(':')[-1])
        price = Decimal(price_string.split('$')[-1])

        return cls(timestamp=timestamp, product_id=product_id, price=price)

<p>Using parse</p>

In [30]:
from parse import parse

In [31]:
LOG = '[2018-05-06T12:58:00.714611] - SALE - PRODUCT: 1345 - PRICE: $09.99'

In [32]:
FORMAT = '[{date}] - SALE - PRODUCT: {product} - PRICE: ${price}'

In [33]:
result = parse(FORMAT, LOG)
result

<Result () {'date': '2018-05-06T12:58:00.714611', 'product': '1345', 'price': '09.99'}>

In [35]:
result['date']

'2018-05-06T12:58:00.714611'

In [36]:
result['product']

'1345'

In [37]:
result['price']

'09.99'

In [38]:
FORMAT = '[{date:ti}] - SALE - PRODUCT: {product:d} - PRICE: ${price:05.2f}'

In [39]:
result = parse(FORMAT, LOG)
result

<Result () {'date': datetime.datetime(2018, 5, 6, 12, 58, 0, 714611), 'product': 1345, 'price': 9.99}>