# Persisting Time Series Data to Databases

In [116]:
import pandas as pd
from sqlalchemy import create_engine
import pandas_datareader.data as web

engine = create_engine("postgresql://postgres:password@localhost:5432/postgres")

In [117]:
engine

Engine(postgresql://postgres:***@localhost:5432/postgres)

In [3]:
amzn_df_2020 = web.get_data_yahoo('AMZN', 
                                  start='2020-01-01', 
                                  end='2020-12-31')

In [4]:
amzn_df_2020.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,1898.01001,1864.150024,1875.0,1898.01001,4029000,1898.01001
2020-01-03,1886.199951,1864.5,1864.5,1874.969971,3764400,1874.969971
2020-01-06,1903.689941,1860.0,1860.0,1902.880005,4061800,1902.880005
2020-01-07,1913.890015,1892.040039,1904.5,1906.859985,4044900,1906.859985
2020-01-08,1911.0,1886.439941,1898.040039,1891.969971,3508000,1891.969971


In [5]:
amzn_df_2020.shape

(253, 6)

In [6]:
amzn_df_2020.to_sql('amazon',
                    engine,
                    if_exists='replace')

In [7]:
query = '''
SELECT EXISTS (
   SELECT FROM information_schema.tables 
   WHERE  table_schema = 'public'
   AND    table_name   = 'amazon'
   );
'''
engine.execute(query).fetchone()

(True,)

In [8]:
query = '''
select count(*) from amazon;
'''
engine.execute(query).fetchone()

(253,)

In [9]:
amzn_df_2021 = web.get_data_yahoo('AMZN', 
                                start='2021-01-01', 
                                end='2021-06-01')

In [10]:
amzn_df_2021.to_sql('amazon',
                    engine,
                    if_exists='append')

In [11]:
amzn_df_2021.shape

(103, 6)

In [12]:
query = '''
select count(*) from amazon;
'''
engine.execute(query).fetchone()

(356,)

### Writing to MySQL 

In [18]:
engine = create_engine("mysql+pymysql://root:password@localhost:3306/stocks")
amzn_df_2020.to_sql('amazon',
                    engine,
                    if_exists='replace')

query = '''
select count(*) from amazon;
'''
engine.execute(query).fetchone()

(253,)

In [19]:
amzn_df_2021.to_sql('amazon',
                    engine,
                    if_exists='append')

query = '''
select count(*) from amazon;
'''
engine.execute(query).fetchone()

(356,)

# Storing Data to MongoDB

In [120]:
import pandas as pd
from pymongo import MongoClient


In [121]:
client = MongoClient('mongodb://localhost:27017')

In [122]:
db = client['stocks']
collection = db['amazon']

In [123]:
amzn_df_2020

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,1898.010010,1864.150024,1875.000000,1898.010010,4029000,1898.010010
2020-01-03,1886.199951,1864.500000,1864.500000,1874.969971,3764400,1874.969971
2020-01-06,1903.689941,1860.000000,1860.000000,1902.880005,4061800,1902.880005
2020-01-07,1913.890015,1892.040039,1904.500000,1906.859985,4044900,1906.859985
2020-01-08,1911.000000,1886.439941,1898.040039,1891.969971,3508000,1891.969971
...,...,...,...,...,...,...
2020-12-24,3202.000000,3169.000000,3193.899902,3172.689941,1451900,3172.689941
2020-12-28,3304.000000,3172.689941,3194.000000,3283.959961,5686800,3283.959961
2020-12-29,3350.649902,3281.219971,3309.939941,3322.000000,4872900,3322.000000
2020-12-30,3342.100098,3282.469971,3341.000000,3285.850098,3209300,3285.850098


In [124]:
amzn_records = amzn_df_2020.reset_index().to_dict(orient='records')

In [125]:
len(amzn_records)

253

In [126]:
#amzn_df_2020.reset_index().to_dict()

In [127]:
amzn_records[0:1]

[{'Date': Timestamp('2020-01-02 00:00:00'),
  'High': 1898.010009765625,
  'Low': 1864.1500244140625,
  'Open': 1875.0,
  'Close': 1898.010009765625,
  'Volume': 4029000,
  'Adj Close': 1898.010009765625}]

In [128]:
collection.insert_many(amzn_records)

<pymongo.results.InsertManyResult at 0x7fc1563bf100>

In [129]:
client.list_database_names()

['admin', 'config', 'local', 'stocks']

In [130]:
db.list_collection_names()

['amazon']

In [131]:
collection.find_one()

{'_id': ObjectId('615c1b4179449e4481a5fd67'),
 'Date': datetime.datetime(2020, 1, 2, 0, 0),
 'High': 1898.010009765625,
 'Low': 1864.1500244140625,
 'Open': 1875.0,
 'Close': 1898.010009765625,
 'Volume': 4029000,
 'Adj Close': 1898.010009765625}

In [132]:
# filter documents that are greater than August 1, 2020
# and retrieve the first record
import datetime
collection.find_one({'Date': {'$gt': datetime.datetime(2020, 8,1)}})

{'_id': ObjectId('615c1b4179449e4481a5fdfa'),
 'Date': datetime.datetime(2020, 8, 3, 0, 0),
 'High': 3184.0,
 'Low': 3104.0,
 'Open': 3180.510009765625,
 'Close': 3111.889892578125,
 'Volume': 5074700,
 'Adj Close': 3111.889892578125}

In [133]:
collection.count_documents({})

253

### InsertOneResult

In [134]:
one_record = (amzn_df_2021.reset_index()
                          .iloc[0]
                          .to_dict())
one_record

{'Date': Timestamp('2021-01-04 00:00:00'),
 'High': 3272.0,
 'Low': 3144.02001953125,
 'Open': 3270.0,
 'Close': 3186.6298828125,
 'Volume': 4411400,
 'Adj Close': 3186.6298828125}

In [135]:
result_id = collection.insert_one(one_record)

In [136]:
result_id

<pymongo.results.InsertOneResult at 0x7fc155ee4800>

In [137]:
result_id.inserted_id

ObjectId('615c1b4a79449e4481a5fe64')

In [114]:
# list(collection.find({'Date': {'$gt': datetime.datetime(2020, 8,1)}}, {'Close': 1 }))

### MongoDB Time Series Collection

In [207]:
db.create_collection("weather", { "timeseries": { "timeField": "timestamp" ,  "granularity": "days"} })

In [149]:
db.create_collection("test")

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stocks'), 'test')

In [153]:
db.list_collection_names()

['amazon']

In [154]:
client.admin.command( { 'listCollections': 1.0 })

{'cursor': {'id': 0,
  'ns': 'admin.$cmd.listCollections',
  'firstBatch': [{'name': 'system.version',
    'type': 'collection',
    'options': {},
    'info': {'readOnly': False,
     'uuid': UUID('2cc2e626-54c6-4c91-8300-8fb4f76336c3')},
    'idIndex': {'v': 2, 'key': {'_id': 1}, 'name': '_id_'}}]},
 'ok': 1.0}

In [203]:
test = db.create_collection(name = "test", 
                         capped =  False,
                            timeseries = {"timeField": "Date", "granularity": "hours"})

In [204]:
a = db.list_collections()

In [205]:
for i in a:
    print(i['name'], i['type'])

test timeseries
system.buckets.test collection
system.views collection
amazon collection


In [202]:
db.drop_collection('test')

{'nIndexesWas': 0, 'ns': 'stocks.test', 'ok': 1.0}

In [208]:
test.insert_many(amzn_df_2021.reset_index().to_dict(orient='records'))

<pymongo.results.InsertManyResult at 0x7fc155062540>

In [215]:
test.count_documents({})

103

In [213]:
collection.count_documents({})

254

In [216]:
test.find_one({})

{'Date': datetime.datetime(2021, 1, 4, 0, 0),
 'Close': 3186.6298828125,
 'Volume': 4411400,
 'Low': 3144.02001953125,
 'High': 3272.0,
 'Adj Close': 3186.6298828125,
 'Open': 3270.0,
 '_id': ObjectId('615c208679449e4481a5fe65')}

In [217]:
collection.find_one({})

{'_id': ObjectId('615c1b4179449e4481a5fd67'),
 'Date': datetime.datetime(2020, 1, 2, 0, 0),
 'High': 1898.010009765625,
 'Low': 1864.1500244140625,
 'Open': 1875.0,
 'Close': 1898.010009765625,
 'Volume': 4029000,
 'Adj Close': 1898.010009765625}

In [None]:
test = db.create_collection(name = "test", 
                         capped =  False,
                            timeseries = {"timeField": "Date", "granularity": "hours"})

In [224]:
stocks = db.create_collection(
    "stocksts",
    capped = False,
       timeseries = {
          "timeField": "date",
          "metaField": "metadata",
          "granularity": "hours"

    }
)

In [230]:
stocks.insert_many([{
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,1),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,2),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,3),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,4),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,5),
   "close": 16
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,6),
   "close": 15
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,7),
   "close": 13
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,8),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,9),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,10),
   "close": 12
}])

<pymongo.results.InsertManyResult at 0x7fc157787240>

In [232]:
stocks.find_one({})

{'date': datetime.datetime(2021, 1, 1, 1, 0),
 'metadata': {'ticker': 'AMZN', 'type': 'price'},
 'close': 12,
 '_id': ObjectId('615c23a379449e4481a5fed6')}

In [233]:
collection.insert_many([{
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,1),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,2),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,3),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,4),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,5),
   "close": 16
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,6),
   "close": 15
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,7),
   "close": 13
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,8),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,9),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,10),
   "close": 12
}])

<pymongo.results.InsertManyResult at 0x7fc157751a40>

In [235]:
collection = db['new']

In [236]:
collection.insert_many([{
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,1),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,2),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,3),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,4),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,5),
   "close": 16
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,6),
   "close": 15
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,7),
   "close": 13
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,8),
   "close": 12
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,9),
   "close": 11
}, {
   "metadata": {"ticker": "AMZN", "type": "price"},
   "date": datetime.datetime(2021,1,1,10),
   "close": 12
}])

<pymongo.results.InsertManyResult at 0x7fc155085380>

In [237]:
collection.find_one()

{'_id': ObjectId('615c241079449e4481a5feea'),
 'metadata': {'ticker': 'AMZN', 'type': 'price'},
 'date': datetime.datetime(2021, 1, 1, 1, 0),
 'close': 12}

In [238]:
stocks.find_one()

{'date': datetime.datetime(2021, 1, 1, 1, 0),
 'metadata': {'ticker': 'AMZN', 'type': 'price'},
 'close': 12,
 '_id': ObjectId('615c23a379449e4481a5fed6')}

In [239]:
colls = db.list_collections()

In [240]:
colls.next()

{'name': 'stocksts',
 'type': 'timeseries',
 'options': {'timeseries': {'timeField': 'date',
   'metaField': 'metadata',
   'granularity': 'hours',
   'bucketMaxSpanSeconds': 2592000}},
 'info': {'readOnly': False}}

In [241]:
colls.next()

{'name': 'system.buckets.stocksts',
 'type': 'collection',
 'options': {'validator': {'$jsonSchema': {'bsonType': 'object',
    'required': ['_id', 'control', 'data'],
    'properties': {'_id': {'bsonType': 'objectId'},
     'control': {'bsonType': 'object',
      'required': ['version', 'min', 'max'],
      'properties': {'version': {'bsonType': 'number'},
       'min': {'bsonType': 'object',
        'required': ['date'],
        'properties': {'date': {'bsonType': 'date'}}},
       'max': {'bsonType': 'object',
        'required': ['date'],
        'properties': {'date': {'bsonType': 'date'}}},
       'closed': {'bsonType': 'bool'}}},
     'data': {'bsonType': 'object'},
     'meta': {}},
    'additionalProperties': False}},
  'clusteredIndex': True,
  'timeseries': {'timeField': 'date',
   'metaField': 'metadata',
   'granularity': 'hours',
   'bucketMaxSpanSeconds': 2592000}},
 'info': {'readOnly': False,
  'uuid': UUID('1b8e40df-e4ad-4f1a-be8c-c579b0be1ddd')}}

In [242]:
colls.next()

{'name': 'test',
 'type': 'timeseries',
 'options': {'timeseries': {'timeField': 'Date',
   'granularity': 'hours',
   'bucketMaxSpanSeconds': 2592000}},
 'info': {'readOnly': False}}

In [243]:
colls.next()

{'name': 'system.buckets.test',
 'type': 'collection',
 'options': {'validator': {'$jsonSchema': {'bsonType': 'object',
    'required': ['_id', 'control', 'data'],
    'properties': {'_id': {'bsonType': 'objectId'},
     'control': {'bsonType': 'object',
      'required': ['version', 'min', 'max'],
      'properties': {'version': {'bsonType': 'number'},
       'min': {'bsonType': 'object',
        'required': ['Date'],
        'properties': {'Date': {'bsonType': 'date'}}},
       'max': {'bsonType': 'object',
        'required': ['Date'],
        'properties': {'Date': {'bsonType': 'date'}}},
       'closed': {'bsonType': 'bool'}}},
     'data': {'bsonType': 'object'},
     'meta': {}},
    'additionalProperties': False}},
  'clusteredIndex': True,
  'timeseries': {'timeField': 'Date',
   'granularity': 'hours',
   'bucketMaxSpanSeconds': 2592000}},
 'info': {'readOnly': False,
  'uuid': UUID('9f53eee7-bb1a-49b5-8387-963c8fec650c')}}

In [244]:
colls.next()

{'name': 'system.views',
 'type': 'collection',
 'options': {},
 'info': {'readOnly': False,
  'uuid': UUID('a21f031b-7558-4004-9d12-634ca98930cf')},
 'idIndex': {'v': 2, 'key': {'_id': 1}, 'name': '_id_'}}

In [245]:
colls.next()

{'name': 'new',
 'type': 'collection',
 'options': {},
 'info': {'readOnly': False,
  'uuid': UUID('be566b28-df19-4578-98d7-d77c1ca3f893')},
 'idIndex': {'v': 2, 'key': {'_id': 1}, 'name': '_id_'}}

In [246]:
colls.next()

{'name': 'amazon',
 'type': 'collection',
 'options': {},
 'info': {'readOnly': False,
  'uuid': UUID('d7bdf669-be29-4a7b-afec-23e2a506432d')},
 'idIndex': {'v': 2, 'key': {'_id': 1}, 'name': '_id_'}}