In [1]:
from mongoengine import connect, Document, StringField, IntField, DateTimeField

In [2]:
# Connect to MongoDB
connect('adtech', host='localhost', port=27017)

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary(), uuidrepresentation=3, driver=DriverInfo(name='MongoEngine', version='0.29.1', platform=None))

In [21]:
# Define your models for the impressions and viewlog collections

class Impressions(Document):
    impression_id = StringField()
    impression_time = DateTimeField()
    user_id = StringField()
    app_code = IntField()
    os_version = StringField()
    is_4G = IntField()
    is_click = IntField()

meta = {
    'collection': 'impressions'  
}

class viewlog(Document):
    server_time = DateTimeField()
    device_type = StringField()
    session_id = IntField()
    user_id = IntField()
    item_id = IntField()

meta = {
    'collection': 'viewlog'  
}    

In [4]:
# Count total clicks
count_clicks = Impressions.objects(is_click=1).count()

print(f'Total Clicks: {count_clicks}')

Total Clicks: 10862


In [5]:
# Get distinct OS versions
distinct_os_versions = Impressions.objects.distinct("os_version")
print(f"Distinct OS Versions: {distinct_os_versions}")

Distinct OS Versions: ['intermediate', 'latest', 'old']


In [6]:
# Aggregate clicks by OS version where is_4G is 0
result = Impressions.objects(is_4G=0).aggregate([
    {"$group": {"_id": "$os_version", "countClick": {"$sum": "$is_click"}}}
])
for doc in result:
    print(f"OS Version: {doc['_id']}, Clicks: {doc['countClick']}")

OS Version: old, Clicks: 1896
OS Version: latest, Clicks: 3201
OS Version: intermediate, Clicks: 1923


In [7]:
# Aggregate clicks by app_code where app_code > 500 and sort
result = Impressions.objects(app_code__gt=500).aggregate([
    {"$group": {"_id": "$app_code", "countClick": {"$sum": "$is_click"}}},
    {"$sort": {"countClick": -1}}
])
for doc in result:
    print(f"App Code: {doc['_id']}, Clicks: {doc['countClick']}")


App Code: 508, Clicks: 132
App Code: 504, Clicks: 47
App Code: 509, Clicks: 27
App Code: 512, Clicks: 23
App Code: 507, Clicks: 14
App Code: 522, Clicks: 6
App Code: 503, Clicks: 5
App Code: 505, Clicks: 4
App Code: 514, Clicks: 4
App Code: 513, Clicks: 3
App Code: 521, Clicks: 2
App Code: 517, Clicks: 1
App Code: 519, Clicks: 1
App Code: 520, Clicks: 1
App Code: 518, Clicks: 0
App Code: 510, Clicks: 0
App Code: 516, Clicks: 0
App Code: 515, Clicks: 0
App Code: 502, Clicks: 0
App Code: 506, Clicks: 0


In [8]:
# Count clicks per user_id where CountOfClick is 10
result = Impressions.objects.aggregate([
    {
        "$group": {
            "_id": "$user_id",
            "CountOfClick": {"$sum": "$is_click"}
        }
    },
    {
        "$match": {
            "CountOfClick": 10
        }
    }
])
for doc in result:
    print(f"User ID: {doc['_id']}, Count of Clicks: {doc['CountOfClick']}")

User ID: 37747, Count of Clicks: 10
User ID: 90953, Count of Clicks: 10
User ID: 52737, Count of Clicks: 10
User ID: 64389, Count of Clicks: 10
User ID: 3364, Count of Clicks: 10


In [24]:
# Query viewlog collection
result = viewlog.objects.aggregate([
    {
        "$match": {
            "device_type": "android"
        }
    },
    {
        "$sort": {
            "user_id": 1
        }
    },
    {
        "$match":{
            "item_id": {"$gt": 132861}    
        }
    }
])

for doc in result:
    print(doc)

{'_id': ObjectId('66f339bb0b5d4df30046ddd8'), 'server_time': datetime.datetime(2018, 11, 17, 21, 15), 'device_type': 'android', 'session_id': 699238, 'user_id': 111, 'item_id': 132864}
{'_id': ObjectId('66f339730b5d4df3002f3d78'), 'server_time': datetime.datetime(2018, 10, 19, 0, 5), 'device_type': 'android', 'session_id': 692656, 'user_id': 131, 'item_id': 132865}
{'_id': ObjectId('66f339830b5d4df30034a049'), 'server_time': datetime.datetime(2018, 10, 26, 5, 48), 'device_type': 'android', 'session_id': 103223, 'user_id': 5822, 'item_id': 132863}
{'_id': ObjectId('66f339770b5d4df30030af40'), 'server_time': datetime.datetime(2018, 10, 21, 1, 53), 'device_type': 'android', 'session_id': 182390, 'user_id': 10613, 'item_id': 132865}
{'_id': ObjectId('66f339fa0b5d4df3005aa751'), 'server_time': datetime.datetime(2018, 12, 10, 23, 20), 'device_type': 'android', 'session_id': 1026181, 'user_id': 10947, 'item_id': 132862}
{'_id': ObjectId('66f339740b5d4df3002f9476'), 'server_time': datetime.dat