# Count the number of lines in Python for each file

In [1]:
!ls -l

total 1013912
-rw-rw-r-- 1 dsc dsc 554970628 may 31 12:52 bookings.csv.bz2
-rw-rw-r-- 1 dsc dsc      3654 jun  1 09:22 ch_01-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     12930 may 31 12:55 ch_02-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     10832 may 31 12:55 ch_03-Empty.ipynb
-rw-rw-r-- 1 dsc dsc      8019 may 31 12:55 ch_04-Empty.ipynb
-rw-rw-r-- 1 dsc dsc       962 may 31 12:55 ch_05b-empty.ipynb
-rw-rw-r-- 1 dsc dsc     24953 may 31 12:55 ch_05-empty.ipynb
-rw-rw-r-- 1 dsc dsc 483188920 may 31 12:52 searches.csv.bz2


In [2]:
%%bash
bzcat bookings.csv.bz2 | head -10000 > bookings.sample.csv
bzip2 -f bookings.sample.csv

In [3]:
!ls -l

total 1014436
-rw-rw-r-- 1 dsc dsc 554970628 may 31 12:52 bookings.csv.bz2
-rw-rw-r-- 1 dsc dsc    535893 jun  1 09:24 bookings.sample.csv.bz2
-rw-rw-r-- 1 dsc dsc      3654 jun  1 09:22 ch_01-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     12930 may 31 12:55 ch_02-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     10832 may 31 12:55 ch_03-Empty.ipynb
-rw-rw-r-- 1 dsc dsc      8019 may 31 12:55 ch_04-Empty.ipynb
-rw-rw-r-- 1 dsc dsc       962 may 31 12:55 ch_05b-empty.ipynb
-rw-rw-r-- 1 dsc dsc     24953 may 31 12:55 ch_05-empty.ipynb
-rw-rw-r-- 1 dsc dsc 483188920 may 31 12:52 searches.csv.bz2


## 1) Command Line

In [5]:
! bzcat bookings.sample.csv.bz2 | wc -l

10000


## 2) Python:

#### 2a) Python without uncompressing

In [7]:
import bz2

In [8]:
filename='./bookings.sample.csv.bz2'
fileBz2=bz2.BZ2File(filename)

In [9]:
type(fileBz2)

bz2.BZ2File

In [10]:
k=0
for line in fileBz2:
    k+=1
    
print("%s has %d lines."%(filename, k))

./bookings.sample.csv.bz2 has 10000 lines.


In [12]:
fileBz2.closed

False

In [14]:
fileBz2.close()

In [15]:
fileBz2.closed

True

In [17]:
import bz2
filename='./bookings.sample.csv.bz2'
with bz2.BZ2File(filename) as file_input:
    k=0
    for line in file_input:
        k+=1
    
print("%s has %d lines."%(filename, k))

./bookings.sample.csv.bz2 has 10000 lines.


In [19]:
import bz2
filename='./bookings.sample.csv.bz2'
with bz2.BZ2File(filename) as file_input:
    k=0
    for line in file_input:
        k+=1
        if k==10: break
    
print("%s has %d lines."%(filename, k))

./bookings.sample.csv.bz2 has 10 lines.


In [20]:
file_input.closed

True

#### 2b) Python on row uncompressed file

In [22]:
!bzcat bookings.csv.bz2 | head -10000 > bookings.sample.csv


bzcat: I/O or other error, bailing out.  Possible reason follows.
bzcat: Broken pipe
	Input file = bookings.csv.bz2, output file = (stdout)


In [23]:
!ls -l

total 1018580
-rw-rw-r-- 1 dsc dsc 554970628 may 31 12:52 bookings.csv.bz2
-rw-rw-r-- 1 dsc dsc   4232732 jun  1 09:41 bookings.sample.csv
-rw-rw-r-- 1 dsc dsc    535893 jun  1 09:24 bookings.sample.csv.bz2
-rw-rw-r-- 1 dsc dsc      9571 jun  1 09:42 ch_01-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     12930 may 31 12:55 ch_02-Empty.ipynb
-rw-rw-r-- 1 dsc dsc     10832 may 31 12:55 ch_03-Empty.ipynb
-rw-rw-r-- 1 dsc dsc      8019 may 31 12:55 ch_04-Empty.ipynb
-rw-rw-r-- 1 dsc dsc       962 may 31 12:55 ch_05b-empty.ipynb
-rw-rw-r-- 1 dsc dsc     24953 may 31 12:55 ch_05-empty.ipynb
-rw-rw-r-- 1 dsc dsc 483188920 may 31 12:52 searches.csv.bz2


In [24]:
reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [25]:
filename='./bookings.sample.csv'
f=open(filename,"r")
k=0
for line in f:
    k+=1
    
print("%s has %d lines."%(filename, k))

./bookings.sample.csv has 10000 lines.


In [26]:
with open(filename,"r") as file_input:
    for k, line in enumerate(file_input):
        pass
print("%s has %d lines."%(filename, k+1))    

./bookings.sample.csv has 10000 lines.


In [27]:
num_lin=sum(1 for line in open(filename,"r"))

In [28]:
num_lin

10000

## 3) What if the file didnt exist? Use Try-except...

In [38]:
filename='./bookings.sample.csv'

try:
    with open(filename,"r") as file_input:
        for k, line in enumerate(file_input):
            pass
    g=int('as')
    print("%s has %d lines."%(filename, k+1))   
except ValueError:
    print("cannot convert str to int")
except FileNotFoundError:
    print("File not found")
except:
    print ("unexpected error")

cannot convert str to int


In [34]:
filename='./bookikjgkjkjngs.sample.csv'
with open(filename,"r") as file_input:
        for k, line in enumerate(file_input):
            pass
print("%s has %d lines."%(filename, k+1))  

FileNotFoundError: [Errno 2] No such file or directory: './bookikjgkjkjngs.sample.csv'

In [36]:
int('abs')

ValueError: invalid literal for int() with base 10: 'abs'