# Comparing the read speeds of `csv` and `pandas` python packages

In [122]:
import csv
import pandas as pd

# Speed of reading a csv file having 1000 rows using `csv package`

In [123]:
%%time
with open('employee_birthday1000.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            line_count += 1
    print(f'Processed {line_count} lines.')

Column names are name, department, birthday month
Processed 1000 lines.
CPU times: user 0 ns, sys: 3.4 ms, total: 3.4 ms
Wall time: 2.61 ms


# Speed of reading a csv file having 1000 rows using `pandas package`

In [124]:
%%time
df = pd.read_csv('employee_birthday1000.csv')
print(f'Column names are {df.columns}')
print(f'Processed {df.shape[0]} lines.')

Column names are Index(['name', 'department', 'birthday month'], dtype='object')
Processed 999 lines.
CPU times: user 5.65 ms, sys: 95 µs, total: 5.74 ms
Wall time: 5.17 ms


# Speed of read a csv file having 50k rows using `csv package`

In [125]:
%%time
with open('employee_birthday.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            line_count += 1
    print(f'Processed {line_count} lines.')


Column names are name, department, birthday month
Processed 53665 lines.
CPU times: user 38.5 ms, sys: 7.78 ms, total: 46.3 ms
Wall time: 43.5 ms


# Speed of read a csv file having 50k rows using `pandas package`

In [126]:
%%time
df = pd.read_csv('employee_birthday.csv')
print(f'Column names are {df.columns}')
print(f'Processed {df.shape[0]} lines.')

Column names are Index(['name', 'department', 'birthday month'], dtype='object')
Processed 53664 lines.
CPU times: user 22.6 ms, sys: 12.4 ms, total: 35 ms
Wall time: 34.3 ms


# Comparing the read speeds of `csv` and `pandas` python packages 

# Speed of writing a csv file having 1000 rows using `csv package`

In [127]:
%%time
ITR = 1000
row = "john smith,1132 Anywhere Lane Hoboken NJ, 07030,Jan 4"
header = "name,address,id, date joined"

with open('employee_write_test.csv-csv', 'w') as file:
    employee_writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    employee_writer.writerow(header.split(','))
    for _ in range(ITR):
        employee_writer.writerow(row.split(','))    


CPU times: user 8.01 ms, sys: 0 ns, total: 8.01 ms
Wall time: 7.92 ms


# Speed of writing a csv file having 1000 rows using `pandas package`

In [128]:
%%time
ITR = 1000
row = "john smith,1132 Anywhere Lane Hoboken NJ, 07030,Jan 4"
header = "name,address,id, date joined"

data_list = [] + [header.split(',')]
for _ in range(ITR):
    data_list+=[row.split(',')]

pd.DataFrame(data_list[1:], columns=data_list[0]).to_csv('employee_write_test-pandas.csv')

CPU times: user 25.6 ms, sys: 0 ns, total: 25.6 ms
Wall time: 24.5 ms


# Speed of writing a csv file having 50k rows using `csv package`

In [129]:
%%time
ITR = 50000
row = "john smith,1132 Anywhere Lane Hoboken NJ, 07030,Jan 4"
header = "name,address,id, date joined"

with open('employee_write_test.csv-csv', 'w') as file:
    employee_writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    employee_writer.writerow(header.split(','))
    for _ in range(ITR):
        employee_writer.writerow(row.split(','))    


CPU times: user 95.2 ms, sys: 8.06 ms, total: 103 ms
Wall time: 103 ms


# Speed of writing a csv file having 50k rows using `pandas package`

In [134]:
%%time
ITR = 50000
row = "john smith,1132 Anywhere Lane Hoboken NJ, 07030,Jan 4"
header = "name,address,id, date joined"

data_list = [] + [header.split(',')]
for _ in range(ITR):
    data_list+=[row.split(',')]

pd.DataFrame(data_list[1:], columns=data_list[0]).to_csv('employee_write_test-pandas.csv')

CPU times: user 202 ms, sys: 20 ms, total: 222 ms
Wall time: 222 ms


In [138]:
pd.DataFrame(data_list[1:], columns=data_list[0]).to_excel("temp.xlsx")