# Imports

In [None]:
import pandas as pd
import os
import timeit
import sys

In [None]:
print(f'Python version: {sys.version}')
print(f'Pandas version: {pd.__version__}')


# Global Variables

In [None]:
input_csv_file = "D:/data/london_crime_by_lsoa.csv"
output_feather_file = "data/london_crime_by_lsoa.feather"
output_parquet_file = "data/london_crime_by_lsoa.parquet"

# Read the CSV file

In [None]:
start_csv = timeit.default_timer()
df_csv = pd.read_csv(input_csv_file)
stop_csv = timeit.default_timer()


In [None]:
size_csv_file = os.path.getsize(input_csv_file)

# Conversion to Feather

In [None]:
df_csv.to_feather(output_feather_file, compression='zstd')

In [None]:
start_feather = timeit.default_timer()
df_feather = pd.read_feather(output_feather_file)
stop_feather = timeit.default_timer()


## Analysis

In [None]:
size_feather_file = os.path.getsize(output_feather_file)

In [None]:
print(
    f'[INFO] A remarkable {100*(1-size_feather_file/size_csv_file):.2f}% reduction in file size.')
print(
    f'[INFO] The time taken to read the Feather file was {stop_feather-start_feather:.3f} seconds.')


# Conversion to Parquet

In [None]:
df_csv.to_parquet(output_parquet_file, compression="gzip")


In [None]:
start_parquet = timeit.default_timer()
df_parquet = pd.read_parquet(output_parquet_file)
stop_parquet = timeit.default_timer()


## Analysis

In [None]:
size_parquet_file = os.path.getsize(output_parquet_file)


In [None]:
print(
    f'[INFO] A remarkable {100*(1-size_parquet_file/size_csv_file):.2f}% reduction in file size.')
print(
    f'[INFO] The time taken to read the Parquet file was {stop_parquet-start_parquet:.3f} seconds.')


# Conclusions