### File Handling

* python provides us a basic set of built-in functions to read or write files in the disk

* it is so important as feaure that is present in \_\_builtins\_\_ module and you can access the files without any import.


## open()

* open function allows us to open a file 
    * it is opened for reading by default.
    * but we can open files in different modes.



In [1]:
file = open('primeutils.py')
print(file,type(file))

<_io.TextIOWrapper name='primeutils.py' mode='r' encoding='cp1252'> <class '_io.TextIOWrapper'>


#### Now we can read the content of the file using file.read()

In [2]:
data=file.read()
print(data)

def is_prime(number):
    
    test=2
    while test <number:
        if number%test == 0:
            return False
        test+=1

    return True


#### We should always close the file once the work is over

In [3]:
file.close()

### Lets write a generic show_file funciton

In [4]:
def show_file(path):
    file=open(path)
    data=file.read()
    print(data)
    file.close()

In [5]:
show_file('triangle.py')


import math

class Triangle:
    pass

def validate(t):
    if not isinstance(t, Triangle):
        raise TypeError("t must be a Triangle")
    
    if t.s1>0 and t.s2>0 and t.s3>0 and \
            t.s1+t.s2>t.s3 and \
            t.s2+t.s3>t.s3 and \
            t.s1+t.s3>t.s2:
        return
    raise ValueError("Invalid Sides")

def perimeter(t):
    validate(t) # if it raises we won't reach next line
    # if we reach here that means sides are valid.
    return t.s1+t.s2+t.s3


def create(s1,s2,s3):
    t=Triangle()
    t.s1=s1
    t.s2=s2
    t.s3=s3
    return t

def area(t):
    validate(t)
    s=perimeter(t)/2
    return math.sqrt(s*(s-t.s1)*(s-t.s2)*(s-t.s3))

def draw(t):
    validate(t)
    print(f'Triangle<{t.s1},{t.s2},{t.s3}>')



#### Handling File exceptions.

* we may encouter error while dealing with file
* example
    * file not found
    * no read permission
    * or attempt to write a read only file.

* if we don't handle the exception we may not be able close the file.

In [6]:
file=open('somefile.txt')

FileNotFoundError: [Errno 2] No such file or directory: 'somefile.txt'

In [7]:
file=open('triangle.py')
file.write('new data')
file.close()

UnsupportedOperation: not writable

### File is left unclosed.

* exception occured on line2
* file couldn't be closed 
* this may block the file access.


#### Approach #1  try-finally block

In [17]:
def show_file(path,mode="r"):
    file=None
    try:
        file=open(path,mode)
        data=file.read()
        print(data)
    finally:
        print('\n\nclosing the file\n\n')
        if file:
            file.close()

In [18]:
show_file('circle.py')


import math

class Circle:
    pass

def validate(circle):
    if not isinstance(circle, Circle):
        raise TypeError(f"{type(circle)} Not a Cricle")
    if circle.radius<=0:
        raise ValueError(f'Invalid Radius: {circle.radius}')

def create(radius):
    c=Circle()
    c.radius=radius
    validate(c)
    return c

def perimeter(circle):
    validate(circle)
    return 2* math.pi*circle.radius

def area(circle):
    validate(circle)
    return math.pi*circle.radius*circle.radius

def draw(circle):
    validate(circle)
    print(f'Circle({circle.radius})')
    



closing the file




In [19]:
show_file('newfile.txt')



closing the file




FileNotFoundError: [Errno 2] No such file or directory: 'newfile.txt'

### Approach #2 with keyword

* python provides a with block to clean up the resource once it's use is over

* it is a recommended process to work with with if the our object support it.

* in case of files, the file will automatically get closed once we reach end of with
    * it will happen even if there is an exception raised.

In [20]:
def show_file(path):
    with open(path) as file:
        data=file.read()
        print(data)
    #file is auto closed.

In [21]:
show_file('triangle.py')


import math

class Triangle:
    pass

def validate(t):
    if not isinstance(t, Triangle):
        raise TypeError("t must be a Triangle")
    
    if t.s1>0 and t.s2>0 and t.s3>0 and \
            t.s1+t.s2>t.s3 and \
            t.s2+t.s3>t.s3 and \
            t.s1+t.s3>t.s2:
        return
    raise ValueError("Invalid Sides")

def perimeter(t):
    validate(t) # if it raises we won't reach next line
    # if we reach here that means sides are valid.
    return t.s1+t.s2+t.s3


def create(s1,s2,s3):
    t=Triangle()
    t.s1=s1
    t.s2=s2
    t.s3=s3
    return t

def area(t):
    validate(t)
    s=perimeter(t)/2
    return math.sqrt(s*(s-t.s1)*(s-t.s2)*(s-t.s3))

def draw(t):
    validate(t)
    print(f'Triangle<{t.s1},{t.s2},{t.s3}>')



### Reading a large file.

* a small file can be read in one go using read() function.
* but if we have a very large file (say 200mb) we should read it in chunks rather whole file together.

### Approach #1  read line by line



In [22]:
def read_file_lines(path):
    with open(path) as file:
        lines=0
        data=''
        for line in file.readlines():
            data+=line
            lines+=1
        print(f'total lines {lines}')
        print(f'total size={len(data)}')

In [23]:
read_file_lines('triangle.py')

total lines 38
total size=754


In [24]:
read_file_lines('sherlock-holmes.txt')

total lines 128457
total size=6488665


#### Approach #2

* read file in chunks 

In [25]:
def read_chunks(path,chunk_size=1024):
    count=0
    data=''
    with open(path) as file:
        while True:
            read=file.read(chunk_size)
            if len(read)==0:
                break
            count+=1
            data+=read

    print(f'total read count: {count}')
    print(f'total bytes read: {len(data)}')


In [26]:
read_chunks('sherlock-holmes.txt')

total read count: 6337
total bytes read: 6488665


### Different modes for file

* There are different modes
    * r --> read
    * w --> write
    * t --> text
        * useful for text data
    * b --> binary
        * usefule for binary data like images, video, zip

* some can be combined together
    * rb --> read binary
    * wb --> write binary


In [28]:
#help(open)

### function to duplicate a file


In [29]:
def duplicate_file(source, target, chunk=1024):
    count=0
    bytes=0
    with open(source,"rb") as sfile:
        with open(target, "wb") as tfile:
            while True:
                data= sfile.read(chunk)
                if len(data):
                    count+=1
                    bytes+=len(data)
                    tfile.write(data)
                    print(" + ",end='')
                else:
                    break

In [30]:
duplicate_file('sherlock-holmes.txt','sherlock.txt')

 +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  