In [73]:
#Measure execution time of the cell:
#%%time

#!pip install --upgrade pandas

import pandas as pd
import numpy as np
import os


def generate_random_data(year, month, day, num_rows):
    #np.random.seed(42)  # Set seed for reproducibility    
    #ids = np.random.randint(low=1, high=10**10, size=num_rows)
    ids = np.random.rand(num_rows)*10**10
    
    dates = pd.to_datetime(f"{year}-{month}-{day}")
    times_ms = pd.to_timedelta(np.random.rand(num_rows), unit='ms')
    times_secs = pd.to_timedelta(np.random.rand(num_rows)*59, unit='S')
    times_mins = pd.to_timedelta(np.random.rand(num_rows)*59, unit='T')
    times_hrs = pd.to_timedelta(np.random.rand(num_rows)*23, unit='H')
    datetime_col = dates + times_hrs + times_mins + times_secs + times_ms
    
    price = np.random.uniform(1700, 1900, size=num_rows)  # Random floats between 1700 and 1900
    
    quantity = np.random.rand(num_rows)*1000  # Random floats between 0 and 1000
        
    unix_timestamps_ms = datetime_col.astype(int) // 10**6  # Convert to Unix timestamp in milliseconds
    
    bools = np.random.choice([True, False], size=num_rows)  # Random Boolean values
    
    data = {
        'trade_id': ids,
        #'DateTime': datetime_col,
        'price': price,
        'quantity': quantity,
        'time': unix_timestamps_ms,
        'isSelling': bools
    }
    
    df = pd.DataFrame(data)
    df["trade_id"]=np.float64(round(df["trade_id"]))
    df["time"]=np.float64(round(df["time"]))
    return df


#Elegir un rango de fechas para generar los datasets.
dates = pd.date_range(start='2023-04-01', end='2023-05-31', freq='D')
dates.day

outdir = './orc_datasets/'
if not os.path.exists(outdir):
    os.mkdir(outdir)
    
for date in dates:
    #num_rows = 100_000
    num_rows=1_000_000 + np.random.randint(-100000, 100000)
    df = generate_random_data(date.year, date.month, date.day, num_rows)
    #df['time'] =df['time'].astype('datetime64[s]')
    #print(np.sort(df["DateTime"]))
    #print(df.loc[0]["trade_id"])
    print(df)
    df.to_orc("orc_datasets/"+str(date.year)+"-"+str(date.month)+"-"+str(date.day)+'-df.orc')  


            trade_id        price    quantity          time  isSelling
0       3.489715e+09  1881.504486  548.235817  1.680376e+12      False
1       6.937481e+09  1728.274646  432.372009  1.680390e+12       True
2       5.812665e+09  1808.778408  672.747991  1.680390e+12      False
3       6.906022e+09  1873.613605  401.342600  1.680347e+12      False
4       2.596823e+09  1794.248974  798.701522  1.680334e+12       True
...              ...          ...         ...           ...        ...
912974  6.458762e+09  1842.547705  804.586554  1.680348e+12       True
912975  1.917253e+09  1823.089578  865.056803  1.680374e+12      False
912976  5.911150e+09  1706.036359  304.107789  1.680358e+12      False
912977  8.058590e+09  1775.230274  486.580197  1.680334e+12       True
912978  3.774921e+09  1773.675699  326.211597  1.680381e+12       True

[912979 rows x 5 columns]
             trade_id        price    quantity          time  isSelling
0        6.738361e+09  1854.158362  764.604806  1

             trade_id        price    quantity          time  isSelling
0        4.208028e+09  1732.376809  457.711638  1.681253e+12      False
1        9.882328e+09  1754.443736  584.285730  1.681226e+12      False
2        6.432452e+08  1711.417746  128.968750  1.681222e+12       True
3        3.265199e+09  1807.166256  972.632030  1.681226e+12       True
4        6.091414e+09  1783.597509  648.746969  1.681181e+12       True
...               ...          ...         ...           ...        ...
1007487  5.668683e+09  1786.109615  372.418476  1.681246e+12       True
1007488  8.833647e+09  1740.022882  581.736234  1.681205e+12       True
1007489  8.279897e+09  1700.492177  322.209714  1.681254e+12      False
1007490  4.649705e+08  1859.015529  900.191197  1.681184e+12       True
1007491  9.072706e+09  1881.101187  573.936872  1.681229e+12       True

[1007492 rows x 5 columns]
            trade_id        price    quantity          time  isSelling
0       3.537815e+09  1896.457195  26

            trade_id        price    quantity          time  isSelling
0       4.346439e+09  1772.909695  917.908373  1.682071e+12       True
1       1.614098e+09  1716.358906  289.948921  1.682044e+12       True
2       9.692493e+09  1883.066693  833.157929  1.682053e+12       True
3       9.439688e+09  1879.651773  384.094150  1.682064e+12      False
4       4.405982e+09  1770.691230  929.710856  1.682096e+12       True
...              ...          ...         ...           ...        ...
951281  6.870419e+09  1776.775390  707.724329  1.682039e+12      False
951282  3.245396e+09  1756.184809  615.029400  1.682104e+12      False
951283  8.434971e+09  1705.942409  774.853788  1.682085e+12      False
951284  7.242973e+09  1708.490951  904.729393  1.682116e+12       True
951285  2.058021e+09  1845.159291  232.571468  1.682084e+12      False

[951286 rows x 5 columns]
             trade_id        price    quantity          time  isSelling
0        7.707033e+09  1821.117071  451.997108  1

            trade_id        price    quantity          time  isSelling
0       6.175961e+09  1806.831936  813.314420  1.682901e+12      False
1       5.557009e+09  1757.764694  493.414517  1.682952e+12      False
2       4.819655e+09  1897.352159  182.663315  1.682953e+12       True
3       2.415987e+09  1730.932033  816.553282  1.682917e+12      False
4       5.237803e+09  1764.374013  163.167296  1.682908e+12      False
...              ...          ...         ...           ...        ...
907666  9.521930e+08  1748.130106  156.312677  1.682941e+12       True
907667  4.856247e+09  1712.459146  730.998825  1.682943e+12       True
907668  9.880078e+09  1707.442195  374.232019  1.682925e+12       True
907669  3.461596e+09  1860.718825  758.935773  1.682980e+12      False
907670  2.875831e+09  1719.342740  871.096100  1.682920e+12       True

[907671 rows x 5 columns]
             trade_id        price    quantity          time  isSelling
0        2.734128e+09  1763.207382  754.575356  1

             trade_id        price    quantity          time  isSelling
0        8.105649e+09  1757.884413  650.119965  1.683828e+12      False
1        7.778786e+09  1701.907419  973.135036  1.683771e+12      False
2        4.668851e+08  1868.176185  679.813344  1.683844e+12      False
3        9.313374e+09  1871.924025  297.289555  1.683796e+12      False
4        2.011194e+09  1762.972161  968.645241  1.683772e+12      False
...               ...          ...         ...           ...        ...
1069576  1.031012e+09  1852.233176  650.872602  1.683805e+12       True
1069577  4.709279e+09  1857.646437  457.303977  1.683772e+12       True
1069578  2.895593e+09  1746.039255  318.185198  1.683839e+12      False
1069579  1.973451e+09  1879.519909  534.545675  1.683777e+12      False
1069580  9.807952e+09  1878.601485  666.033196  1.683775e+12       True

[1069581 rows x 5 columns]
             trade_id        price    quantity          time  isSelling
0        6.350664e+09  1736.081008  

            trade_id        price    quantity          time  isSelling
0       2.412698e+09  1801.945767  278.982801  1.684671e+12      False
1       1.094140e+09  1758.607387    2.479168  1.684672e+12       True
2       4.067800e+09  1827.088036  578.850648  1.684709e+12      False
3       7.472289e+09  1821.235277  562.624598  1.684656e+12      False
4       1.804032e+09  1826.647837  842.274760  1.684675e+12      False
...              ...          ...         ...           ...        ...
939160  5.465662e+09  1772.957677   40.234189  1.684660e+12       True
939161  6.185946e+09  1769.212301  769.296681  1.684637e+12      False
939162  3.875801e+09  1861.148634   53.158347  1.684681e+12       True
939163  9.459577e+09  1778.645368  382.783920  1.684641e+12       True
939164  3.127265e+09  1861.626138  434.967424  1.684636e+12       True

[939165 rows x 5 columns]
            trade_id        price    quantity          time  isSelling
0       3.895045e+09  1851.101096  767.041040  1.6

            trade_id        price    quantity          time  isSelling
0       4.090837e+09  1833.090358  663.433354  1.685574e+12      False
1       4.896470e+08  1720.220313  474.636715  1.685519e+12      False
2       8.657334e+09  1715.919059  174.243096  1.685499e+12       True
3       6.190060e+09  1875.693284  991.097460  1.685551e+12      False
4       5.910316e+09  1826.254437  154.961667  1.685499e+12       True
...              ...          ...         ...           ...        ...
984506  8.889948e+09  1770.181141  575.693152  1.685542e+12       True
984507  9.235778e+08  1855.471297  165.281768  1.685534e+12       True
984508  8.247927e+09  1837.862434  787.814779  1.685524e+12       True
984509  3.097158e+08  1860.372702  102.726928  1.685574e+12      False
984510  9.919328e+09  1837.422743   59.277216  1.685541e+12      False

[984511 rows x 5 columns]
