# Python File Handling

In [1]:
# File Handling in Python
# Types of files in Python
# open() & close() function
# Different modes in File Handling
# seek() and tell() methods
# 'with' method
# Functions to read and write files

In [2]:
# Text File - .py, .txt etc..
# Binary file - image, video etc..

In [3]:
# To open the file - 
# f = open('file_path\file_name.ext', 'mode') - returns a file pointer object
# f.read()
# f.write()
# f.close()

In [4]:
# Mode of operation on text file
# Read mode - 'r' (default)
# Write Mode - 'w'
# Append mode - 'a'
# Read and write mode - 'r+'
# Write and read mode - 'w+'
# Append and read mode - 'a+'

# Mode of operation on binary file
# Read mode - 'rb' (default)
# Write Mode - 'wb'
# Append mode - 'ab'
# Read and write mode - 'r+b'
# Write and read mode - 'w+b'
# Append and read mode - 'a+b'

In [5]:
# Read mode - 'r'
# If the file exist then it will open the file in read mode and places the cursor in the begining of the file.
# If the file does not exist then it gives FileNotFoundError.
# In this mode, we are allowed only to read from the file but not allowed to write into the file.

#If the file exist
f = open('Sample_Files/my_file.txt', 'r')
print("File Attributes: ", f)
print(type(f))

File Attributes:  <_io.TextIOWrapper name='Sample_Files/my_file.txt' mode='r' encoding='cp1252'>
<class '_io.TextIOWrapper'>


In [6]:
# f.tell() method returns current position of file object. 
# This method takes no parameters and returns an integer value. 
# Initially file pointer points to the beginning of the file (if not opened in append mode)
print("Current Cursor Position: ", f.tell()) 

# f.seek(index) - is used to move the cursor to specified position.
print("Moved to Cursor Position: ", f.seek(50))
print("Current Cursor Position: ", f.tell()) 

#f.write('Hello')         # UnsupportedOperation: not writable as file is open in read mode

f.close()

Current Cursor Position:  0
Moved to Cursor Position:  50
Current Cursor Position:  50


In [7]:
# read() method - Reads the content of the entire file (by default) or the number of bytes specified

f = open('Sample_Files/my_file.txt', 'r')

# Read the contents of the entire file and returns it as 'str'
# Size is Optional. The number of bytes to return. Default -1, which means the whole file.
content = f.read(100)        
print("Length of file contents: ", len(content))       
print("Contents of File: ", content)
print("Datatype of file contents: ", type(content))   

Length of file contents:  100
Contents of File:  Python is an easy to learn, powerful programming language. It has efficient high-level data structur
Datatype of file contents:  <class 'str'>


In [8]:
# readline() method - Returns one line at a time from the file
# You can also specify how many bytes from the line to return, by using the size parameter.

f = open('Sample_Files/my_file.txt', 'r')

# reads the current line from current cursor position and returns a string object

content = f.readline()
print("Datatype of file contents: ", type(content))  
print("Length of first line: ", len(content))       
print("Contents of First Line: ", content)

Datatype of file contents:  <class 'str'>
Length of first line:  172
Contents of First Line:  Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. 



In [9]:
# Read the first 50 bytes of the next line and returns it as 'str'
content = f.readline(50)
print("Length of second line: ", len(content))       
print("Contents of Second File: ", content)

Length of second line:  50
Contents of Second File:  Pythons elegant syntax and dynamic typing, togethe


In [10]:
# readlines() method - Returns a list containing each line in the file as a list item

f = open('Sample_Files/my_file.txt', 'r')

# reads the whole content of the file from current cursor position and returns a list of strings 
# where each string represent a line.

content = f.readlines()
print("Datatype of file contents: ", type(content), end = '\n\n')  
print("Number of elements in list / Number of lines in file: ", len(content), end = '\n\n')       
print("Contents of entire List: ", content, end = '\n\n')

Datatype of file contents:  <class 'list'>

Number of elements in list / Number of lines in file:  8

Contents of entire List:  ['Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. \n', 'Pythons elegant syntax and dynamic typing, together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms.\n', '\n', 'The Python interpreter and the extensive standard library are freely available in source or binary form for all major platforms from the Python web site, https://www.python.org/, and may be freely distributed. \n', 'The same site also contains distributions of and pointers to many free third party Python modules, programs and tools, and additional documentation.\n', '\n', 'The Python interpreter is easily extended with new functions and data types implemented in C or C++ (or other language

In [11]:
# Print the first 5 words in each line:

for i in content:
    print(" ".join(i.split()[0:5]))

Python is an easy to
Pythons elegant syntax and dynamic

The Python interpreter and the
The same site also contains

The Python interpreter is easily
Python is also suitable as


In [12]:
# For 'r' and 'r+'', If the file does not exist: FileNotFoundError
f = open('Sample_Files/my_mod.txt', 'r')
f.close()

FileNotFoundError: [Errno 2] No such file or directory: 'Sample_Files/my_mod.txt'

In [13]:
# Read and write mode - 'r+'
# If the file exist then it will open the file in read mode and place the cursor in the begining of the file.
# If the file does not exist then it gives FileNotFoundError.
# In this mode, we are allowed to both read and write.

f = open('Sample_Files/my_file_2.txt', 'r+')
print(f.tell())
f.write('Hello')
f.seek(0)
print(f.read(100))
f.close()

# In r+ mode, when we try to write some content to the file, 
# it over writes the content (Replaces the content from current cursor position).
# It does not add/append the content.

0
Hellon is an easy to learn, powerful programming language. It has efficient high-level data structur


In [14]:
# Write mode - 'w'
# If the file exist then it will open the file in write mode and deletes the content of the file.
# If the file does not exist then it will create a file with the given name.
# In this mode, we are allowed only to write into the file but not allowed to read from the file.

f = open('Sample_Files/my_file_3.txt', 'w')
print(f.tell())
# print(f.read())                                         # Unsupported operation

f.write('''If the file exist then it 
will open the file in write mode and 
deletes the content of the file.
If the file does not exist then it 
will create a file with the given name.''')

print(f.tell())

f.write('''If the file exist then it 
will open the file in write mode and 
deletes the content of the file.
If the file does not exist then it 
will create a file with the given name.''')

print(f.tell())

f.close()

0
177
354


In [15]:
# Write and read mode - 'w+'
# If the file exist then it will open the file in write mode and deletes the content of the file.
# If the file does not exist then it will create a file with the given name.
# In this mode, we are allowed to read and write both.

f = open('Sample_Files/my_file_4.txt', 'w+')
print(f.tell())     # Prints current cursor position
print(f.read())     # Prints an empty line as the file is empty

f.write('''If the file exist then it 
will open the file in write mode and 
deletes the content of the file.
If the file does not exist then it 
will create a file with the given name.''')

print(f.tell())     # Prints the current cursor position after writing the above text to the file
print(f.read())     # Prints the file from the current cursor position i.e. 177.
f.seek(0)           # Resets the cursor to position 0
print(f.read())     # Prints the file from the beginning
f.close()

0

177

If the file exist then it 
will open the file in write mode and 
deletes the content of the file.
If the file does not exist then it 
will create a file with the given name.


In [16]:
# Append mode - 'a'
# If the file exist then it will open the file in append mode and places the cursor at the end of the file.
# If the file does not exist then it will create a file with the given name.
# In this mode, we are allowed only to append into the file but not allowed to read from the file.

f = open('Sample_Files/my_file_5.txt', 'a')

print("Cursor position Before appending: ", f.tell())

# f.read()                                        # UnsupportedOperation: not readable

f.write('\nHello World, Welcome to Volvo!!!')     # Appends a line of text
   
f.write('\nGTT Python class at Volvo')            # Appends another line of text

print("Cursor position after appending: ", f.tell())

f.close()

Cursor position Before appending:  549
Cursor position after appending:  610


In [17]:
# Append and read mode - 'a+'
# If the file exist then it will open the file in append mode and places the cursor at the end of the file.
# If the file does not exist then it will create a file with the given name.
# In this mode, we are allowed to read and write both.

f = open('Sample_Files/my_file_5.txt', 'a+')
print(f.tell())
f.seek(0)
print(f.read())
f.close()

610

Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo
Hello World, Welcome to Volvo!!!
GTT Python class at Volvo


In [18]:
# Opening the file using with statement. Automatically closes the file at the end of the indent

with open('Sample_Files/my_file.txt', 'r') as f:
    print(f.read(100))
#     code
#     code
#     code

print("\n\nFile Closed")

Python is an easy to learn, powerful programming language. It has efficient high-level data structur


File Closed


In [19]:
# writelines(['str1', '\nstr2', '\nstr3', '\nstr4']) - writes each string as new line in the file.
with open('Sample_Files/my_file_7.txt', 'w+') as f:
    f.writelines(['str1', '\nstr2', '\nstr3', '\nstr4'])
    f.seek(0)
    print(f.read())

str1
str2
str3
str4


In [20]:
# Count total no of words, characters and lines from the file

with open('Sample_Files/my_file_6.txt', 'r') as f:
    content = f.read()
    print('Total no of characters is', f.tell())
    print('Total no of words is', len(content.split()))
    f.seek(0)
    print('Total no of lines is', len(f.readlines()))

Total no of characters is 1195
Total no of words is 195
Total no of lines is 8


In [21]:
# Copy the content of python.jpeg to new.png
with open('Sample_Files/Python.jpg', 'rb') as f1:
    with open('Sample_Files/Python_new.jpg', 'wb') as f2:
        f2.write(f1.read())

In [22]:
# Write a program to read lines from a text file "notes.txt". 
# Your function should find and display the occurrence of the word "the".

# For example: If the content of the file is:
# """India is the fastest-growing economy. 
# India is looking for more investments around the globe. 
# The whole world is looking at India as a great market. 
# Most of the Indians can foresee the heights that India is capable of reaching."""

# The output should be 5

with open('Sample_Files/notes.txt', 'r') as f1:
    content = f1.read()
    print(content.lower().count('the '))

5


# Sample Use Case

In [23]:
# WAP to read Matlab License Usage log (a text file) and extract required data in a tabular format 
# that is easy to read and looks clean

In [29]:
import pandas as pd

with open('Sample_Files/licence.txt', 'r') as lic:
    lines = lic.readlines()
    
final_field = []    
with open('Sample_Files/licence_out.txt', 'w+') as lic_out:
    for line in lines:
        if line != "\n": 
            line=line.replace('; ', ': ')
            line=line.replace('Users of ', '')
            line=line.replace('Total of ', '')
            line=line.replace('licenses issued', '')
            line=line.replace('licenses in use', '')
            line=line.replace('license issued', '')
            line=line.replace('license in use', '')
            line=line.replace('(', '')
            line=line.replace(')', '')
            line=line.rstrip()
            line=line + "\n"
            field = line.split(": ", 2)
            #print(field)

            final = [field[0], field[1], field[2]]
            final_field.append(list(final))

            # Save the output as a text file
            lic_out.writelines(final)
           
    # Create a Pandas Dataframe with the cleaned up text    
    df = pd.DataFrame(final_field,
                   index=None,
                   columns=['Toolbox', 'Licenses Issued', 'Licenses in Use'])
    
    # Save the output as an excel file
    df.to_excel("Sample_Files/licence_final.xlsx", index=False)
df

Unnamed: 0,Toolbox,Licenses Issued,Licenses in Use
0,MATLAB,10000,31\n
1,SIMULINK,10000,23\n
2,Video_and_Image_Blockset,10000,0\n
3,Control_Toolbox,10000,0\n
4,Curve_Fitting_Toolbox,10000,0\n
...,...,...,...
60,Embedded_IDE_Link,10000,0\n
61,Stateflow_Coder,10000,0\n
62,Target_Support_Package,10000,0\n
63,Embedded_Target_MPC555,10000,0\n
