In [1]:
import json
import pickle
from faker import Faker
from dataclasses import dataclass, asdict
from datetime import date
import time
import transactions_pb2 as PBTransactions
import pandas as pd
import tkinter as tk
from pandastable import Table
import matplotlib.pyplot as plt



In [2]:
faker = Faker()
Faker.seed(12345)

In [3]:
# Declaring the structure of the Transactions
@dataclass
class Transactions():
    id: str
    sender: str
    receiver: str
    date: date
    amount: float

    def to_json(self) -> dict:
        raw = asdict(self)

        if isinstance(raw.get("date"), date):
            raw["date"] = raw["date"].isoformat()

        return raw

    def to_protob(self) -> PBTransactions.Transactions:
        msg = PBTransactions.Transactions()
        msg.id = self.id
        msg.sender = self.sender
        msg.receiver = self.receiver
        msg.date.year = self.date.year
        msg.date.month = self.date.month
        msg.date.day = self.date.day
        msg.amount = self.amount

        return msg

In [4]:
transactions: list[Transactions] = []
ProtobufList = PBTransactions.TransactionsList()

for _ in range(100000):
    transactions.append(Transactions(id=faker.uuid4(), 
                     sender=faker.name(), 
                     receiver=faker.name(), 
                     date=faker.date_object(), 
                     amount=faker.pyfloat(min_value=10.0, max_value=100000.0, right_digits=2)))

In [5]:
########### Serialization Time
t0_json_ser = time.perf_counter()
json_data = json.dumps([t.to_json() for t in transactions], indent=2)
t1_json_ser = time.perf_counter()

t0_pickle_ser = time.perf_counter()
pickle_data = pickle.dumps([t.to_json() for t in transactions])
t1_pickle_ser = time.perf_counter()

t0_proto_ser = time.perf_counter()
ProtobufList.transactions.extend(t.to_protob() for t in transactions)
proto_bytes = ProtobufList.SerializeToString()
t1_proto_ser = time.perf_counter() 


time_json_ser = ((t1_json_ser - t0_json_ser)*1000)
time_pickle_ser = (t1_pickle_ser - t0_pickle_ser)*1000
time_proto_ser = (t1_proto_ser - t0_proto_ser)*1000

size_json = len(json_data.encode("utf-8"))
size_pickle = len(pickle_data)
size_proto = len(proto_bytes)

########### DeSerialization Time
t0_json_des = time.perf_counter()
json_data_des = json.loads(json_data)
t1_json_des = time.perf_counter()

t0_pickle_des = time.perf_counter()
pickle_data = pickle.loads(pickle_data)
t1_pickle_des = time.perf_counter()

t0_proto_des = time.perf_counter()
new_transactions = PBTransactions.TransactionsList()
new_transactions.ParseFromString(proto_bytes)
t1_proto_des = time.perf_counter()

time_json_des = (t1_json_des - t0_json_des)*1000
time_pickle_des = (t1_pickle_des - t0_pickle_des)*1000
time_proto_des = (t1_proto_des - t0_proto_des)*1000

In [6]:
df_data = pd.DataFrame({
    "Serialization Time": [time_json_ser, time_pickle_ser, time_proto_ser],
    "Deserialization Time": [time_json_des, time_pickle_des, time_proto_des],
    "Serialized file size": [size_json, size_pickle, size_proto]
}, index=["JSON", "Pickle", "Proto Buffer"])

df_data

Unnamed: 0,Serialization Time,Deserialization Time,Serialized file size
JSON,553.2685,50.667208,17223984
Pickle,338.168875,28.601959,10755700
Proto Buffer,138.60825,10.969875,8853979


In [7]:
fig, axes = plt.subplots(1, 2, figsize=(9, 3), sharex=False)

# Times (grouped)
df_data[["Serialization Time", "Deserialization Time"]].plot.barh(ax=axes[0])
axes[0].set_xlabel("Time (ms)")
axes[0].set_ylabel("")
axes[0].set_title("Serialization & Deserialization")

# Sizes
(df_data["Serialized file size"] / 1024).plot.barh(ax=axes[1])
axes[1].set_xlabel("Size (KB)")
axes[1].set_ylabel("")
axes[1].set_title("Serialized File Size")

plt.tight_layout()
plt.show()