In [44]:
import pymongo
import pandas as pd
import time
import collections
import numpy as np
import json
import itertools
import datetime

def getValidFields(includePrice = True):
    client = pymongo.MongoClient('localhost', 27017, maxPoolSize=100)
    db = client['Stocks']
    fields = db['Fields'].find(projection = {'_id': False})
    fields = list(pd.DataFrame(list(fields))['Field'].values)
    if not includePrice:
        fields.remove('Last Price')
    return fields

# Stupid but smart
def getCorrespondingStartDate(date):
    month = date.month
    year = date.year
    if month - 10 >= 0: 
        return datetime.date(year, 10, 1)
    if month - 7 >= 0:
        return datetime.date(year, 7, 1)
    if month - 4 >= 0:
        return datetime.date(year, 4, 1)
    return datetime.date(year, 1, 1)

# here's my plan. i iterate through features separately. so i can sort by $exists = True.
# helps avoid those annoying key errors, and the mathematical badness of setting nan = 0
# when you're trying to calculate an average. 
# it's NOT efficient, but we're only doing it once. right??
def getDataAnomQuarterly(start_date, end_date):
    client = pymongo.MongoClient('localhost', 27017, maxPoolSize=100)
    db = client['Stocks']
    curr_stock = db["AAPL US Equity"]

    fields = ['LAST_PRICE']
    df = pd.DataFrame([], columns=fields)
    
    curr_date = None
    curr_total = 0
    curr_count = 0
    
    for field in fields:
        for thing in curr_stock.find({field:{'$exists': True}}).sort([('Date', 1)]):
            print(thing)
            date = thing['Date']
            corr_date = getCorrespondingStartDate(date)
            
            if curr_date is None:
                curr_count += 1
                curr_date = corr_date
                curr_total = thing[field]
            elif (corr_date == curr_date):
                curr_count += 1
                curr_total += thing[field]   
            # If the dates don't match, we save the info before moving on. 
            else:
                df.at[curr_date, field] = curr_total/curr_count
                curr_date = corr_date
                curr_count = 1
                curr_total = thing[field]
            
    return df

In [45]:
dataDict = getDataAnomQuarterly(datetime.datetime(1995, 1, 1), datetime.datetime(1998, 1, 1))

{'_id': ObjectId('5be8d89f7269fc7c7a7bba76'), 'Date': datetime.datetime(1990, 1, 31, 0, 0), 'LAST_PRICE': 1.2143}
{'_id': ObjectId('5be8d89f7269fc7c7a7bba79'), 'Date': datetime.datetime(1990, 2, 28, 0, 0), 'LAST_PRICE': 1.2143}
{'_id': ObjectId('5be8d5147269fc7c7a5d10ca'), 'Date': datetime.datetime(1990, 3, 30, 0, 0), 'HISTORICAL_MARKET_CAP': 4961.6174, 'LAST_PRICE': 1.4375, 'EBITDA': 243.463}
{'_id': ObjectId('5be8d89f7269fc7c7a7bba7e'), 'Date': datetime.datetime(1990, 4, 30, 0, 0), 'LAST_PRICE': 1.4063}
{'_id': ObjectId('5be8d89f7269fc7c7a7bba81'), 'Date': datetime.datetime(1990, 5, 31, 0, 0), 'LAST_PRICE': 1.4732}
{'_id': ObjectId('5be8cdd77269fc7c7a3d0221'), 'Date': datetime.datetime(1990, 6, 29, 0, 0), 'BS_LT_BORROW': 0.0, 'HISTORICAL_MARKET_CAP': 5355.143, 'LAST_PRICE': 1.5982, 'EBITDA': 232.09900000000002}
{'_id': ObjectId('5be8d89f7269fc7c7a7bba86'), 'Date': datetime.datetime(1990, 7, 31, 0, 0), 'LAST_PRICE': 1.5}
{'_id': ObjectId('5be8d89f7269fc7c7a7bba89'), 'Date': datetime.d

In [43]:
print(dataDict)

           Date LAST_PRICE
1990-01-01  NaN     1.2887
1990-04-01  NaN    1.49257
1990-07-01  NaN     1.2857
1990-10-01  NaN    1.31547
1991-01-01  NaN    2.15177
1991-04-01  NaN    1.70833
1991-07-01  NaN    1.77087
1991-10-01  NaN     1.8899
1992-01-01  NaN    2.26787
1992-04-01  NaN     1.9985
1992-07-01  NaN    1.64137
1992-10-01  NaN    2.02083
1993-01-01  NaN     1.9524
1993-04-01  NaN    1.75447
1993-07-01  NaN     0.9241
1993-10-01  NaN    1.08927
1994-01-01  NaN    1.22023
1994-04-01  NaN     1.0208
1994-07-01  NaN    1.23287
1994-10-01  NaN     1.4189
1995-01-01  NaN    1.37053
1995-04-01  NaN      1.503
1995-07-01  NaN    1.49107
1995-10-01  NaN    1.26563
1996-01-01  NaN   0.948633
1996-04-01  NaN   0.851167
1996-07-01  NaN   0.814733
1996-10-01  NaN     0.8095
1997-01-01  NaN   0.608667
1997-04-01  NaN   0.569933
...         ...        ...
2011-01-01  NaN    49.5732
2011-04-01  NaN    49.2205
2011-07-01  NaN    55.0776
2011-10-01  NaN    56.7609
2012-01-01  NaN    76.1176
2