# Part - 01

## 1. fstrings
```python
# These things are mostly needed for formating in spacy.
```

In [16]:
books = [('Name', 'Author', 'Pages'),
        ('2666', 'Roberto Bolaño', 1055),
        ('Desert Solitaire', 'Edward Abbey', 264),
        ('Geek Love', 'Katherine Dunn', 272)]

In [17]:
print(books)

[('Name', 'Author', 'Pages'), ('2666', 'Roberto Bolaño', 1055), ('Desert Solitaire', 'Edward Abbey', 264), ('Geek Love', 'Katherine Dunn', 272)]


In [18]:
for book in books:
    print(book)

('Name', 'Author', 'Pages')
('2666', 'Roberto Bolaño', 1055)
('Desert Solitaire', 'Edward Abbey', 264)
('Geek Love', 'Katherine Dunn', 272)


In [19]:
for book_name, author, pages in books:
    print(f'The Name of the book is {book_name}.')
    print(f'The Author of the book is {author}.')
    print(f'There are {pages} pages.')

The Name of the book is Name.
The Author of the book is Author.
There are Pages pages.
The Name of the book is 2666.
The Author of the book is Roberto Bolaño.
There are 1055 pages.
The Name of the book is Desert Solitaire.
The Author of the book is Edward Abbey.
There are 264 pages.
The Name of the book is Geek Love.
The Author of the book is Katherine Dunn.
There are 272 pages.


In [20]:
for book_name, author, pages in books:
    print(f'{book_name} {author} {pages}')

Name Author Pages
2666 Roberto Bolaño 1055
Desert Solitaire Edward Abbey 264
Geek Love Katherine Dunn 272


In [21]:
for book_name, author, pages in books:
    print(f'{book_name:{20}} {author:{20}} {pages:.>{10}}')

Name                 Author               .....Pages
2666                 Roberto Bolaño       ......1055
Desert Solitaire     Edward Abbey         .......264
Geek Love            Katherine Dunn       .......272


In [22]:
from datetime import datetime
today = datetime.today()
print(f'{today:%B %d %Y}')

October 10 2020


## 2. File Operations

In [23]:
%%writefile text.txt
this is the first line
this is the second line
this is the third line

Overwriting text.txt


In [24]:
newfile = open('text.txt')

In [25]:
# Return's a the file data in string format.
newfile.read()

'this is the first line\nthis is the second line\nthis is the third line\n'

As the cursor has moved to eof. newfile.read() should return a empty string.

In [26]:
newfile.read()

''

In [27]:
# The .seek method helps in moving the cursor around.
newfile.seek(0)

0

In [28]:
newfile.read()

'this is the first line\nthis is the second line\nthis is the third line\n'

In [29]:
newfile.seek(0)

0

In [30]:
# You can also read the file line wise using the readline method.
# It return's a list of strings with each element denoting a newline.
doc = newfile.readlines()
for lines in doc:
    print(lines, end='')

this is the first line
this is the second line
this is the third line


In [31]:
# You need to close the files after you are done with working on it.
# It can cause errors like the file is in use from some other program.
newfile.close()

In [32]:
# Writing onto a file
# 1. Truncation writing mode.
newfile = open('text.txt', 'w+')

In [33]:
# When file is opened in w+ mode it repaces the current content of the file.
# when something is written onto it.
newfile.write('This is a message that replaces the previous message')

52

In [34]:
newfile.seek(0)
newfile.read()

'This is a message that replaces the previous message'

In [35]:
newfile.close()

In [36]:
# The replacement of the data can be prevented using the append mode.
# Creates a file if it does not exists.
newfile = open('text.txt', 'a+')

In [37]:
newfile.seek(0)
newfile.read()

'This is a message that replaces the previous message'

In [38]:
newfile.write('\nwriting a new line')

19

In [39]:
newfile.seek(0)
print(newfile.read())

This is a message that replaces the previous message
writing a new line


In [40]:
newfile.close()

In [41]:
# Context manager for opening and closing of files.
with open('text.txt', 'r') as newfile:
    print(newfile.readlines())

['This is a message that replaces the previous message\n', 'writing a new line']


## 3. PyPDF

In [42]:
# Module for reading pdf files.
import PyPDF2

In [43]:
ls

Business_Proposal.pdf  US_Declaration.pdf     text.txt
Some_New_Doc.pdf       Untitled.ipynb


In [57]:
# Reading the file in binary format.
myfile = open('US_Declaration.pdf', 'rb')
pdf_reader = PyPDF2.PdfFileReader(myfile)
pdf_reader.numPages

5

In [None]:
page_one = pdf_reader.getPage(0)
# I am not running this command because, it takes too much paper-space.
page_one.extractText()

In [58]:
myfile.close()
# Copy and appending pdf pages.
f = open('US_Declaration.pdf', 'rb')
pdf_reader = PyPDF2.PdfFileReader(f)
first_page = pdf_reader.getPage(0)
pdf_writer = PyPDF2.PdfFileWriter()
pdf_writer.addPage(first_page)
pdf_output = open('doc01.pdf', 'wb')
pdf_writer.write(pdf_output)
pdf_output.close()
f.close()

## 3. Regular Expressions

In [59]:
import re

In [61]:
pattern = 'phone'
statement = 'I use a iphone'
my_match = re.search(pattern, statement)

In [63]:
print(my_match.span())
print(my_match.start())
print(my_match.end())

(9, 14)
9
14


In [70]:
pattern = 'scream'
t_twister = 'I scream, You scream, We all scream for Icecream.'
# Returns a list.
my_match = re.findall(pattern, t_twister)
print(my_match)

['scream', 'scream', 'scream']


In [73]:
rec = re.finditer(pattern, t_twister)
for match in rec:
    print(match.span())

(2, 8)
(14, 20)
(29, 35)


In [74]:
pattern = r'\d{3}-\d{3}-\d{4}'

In [76]:
from random import randint
number_list = []
for i in range(10):
    first_part = randint(100, 999)
    second_part = randint(100, 999)
    third_part = randint(1000, 9999)
    ph_no = f'{first_part}-{second_part}-{third_part}'
    number_list.append(ph_no)
number_list = ' '.join(number_list)
print(number_list)

316-339-9223 944-675-7803 157-689-6009 492-327-1352 393-418-2021 808-279-6798 699-367-1872 391-441-8104 176-871-3808 558-320-5012


In [77]:
rec = re.finditer(pattern, number_list)
for match in rec:
    print(match.span())

(0, 12)
(13, 25)
(26, 38)
(39, 51)
(52, 64)
(65, 77)
(78, 90)
(91, 103)
(104, 116)
(117, 129)
