In [37]:
from pymongo import MongoClient
import json
import os
from dotenv import load_dotenv
load_dotenv()

mongo_url = os.getenv('MONGO_URL') 
client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$group': {
            '_id': None, 
            'avg_comments': {
                '$avg': '$content.comments_count'
            },
            'avg_response': {
                '$avg': '$content.resp_total'
            }
        }
    }, {
        '$project': {
            '_id': 0, 
            'avg_response': 1,
            'avg_comments': 1,
        }
    }
])

# Affichage des résultats
for doc in result:
    print(f"nombre moyen de commentaires : {doc['avg_comments']}, nombre moyen de réponses : {doc['avg_response']}")
client.close()

nombre moyen de commentaires : 2.273674062406648, nombre moyen de réponses : 1.429133965184803


In [35]:
from pymongo import MongoClient
import json

client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$group': {
            '_id': "$content.course_id", 
            'avg_comments': {
                '$avg': '$content.comments_count'
            },
            'avg_response': {
                '$avg': '$content.resp_total'
            }
        }
    }, {
        '$sort': {
            'avg_response': -1
        }
    }
])

# Affichage des résultats
for doc in result:
    print(f"nombre moyen de commentaires pour {doc["_id"]:45} : {doc['avg_comments']}, nombre moyen de réponses : {doc['avg_response']}")
client.close()

nombre moyen de commentaires pour course-v1:MITx+Launch.x_2+2T2016              : 72.1877729257642, nombre moyen de réponses : 63.877729257641924
nombre moyen de commentaires pour course-v1:GIPFTLVIP+137001+session01          : 20.642201834862384, nombre moyen de réponses : 14.81651376146789
nombre moyen de commentaires pour course-v1:grenoblealpes+92009+session01       : 21.82278481012658, nombre moyen de réponses : 9.594936708860759
nombre moyen de commentaires pour course-v1:grenoblealpes+92012+session01       : 24.058823529411764, nombre moyen de réponses : 9.588235294117647
nombre moyen de commentaires pour course-v1:USPC+37003+session02                : 14.625, nombre moyen de réponses : 8.125
nombre moyen de commentaires pour course-v1:umontpellier+08003+session02        : 13.160714285714286, nombre moyen de réponses : 7.321428571428571
nombre moyen de commentaires pour course-v1:USPC+37007+session01                : 18.105263157894736, nombre moyen de réponses : 6.7368421052631

In [15]:
from pymongo import MongoClient

client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$unwind': {
            'path': '$content.course_id'
        }
    }, {
        '$project': {
            'course_id': '$content.course_id'
        }
    }, {
        '$group': {
            '_id': '$course_id', 
            'taille': {
                '$count': {}
            }
        }
    }, {
        '$project': {
            '_id': 0, 
            'course_id': '$_id', 
            'taille': '$taille'
        }
    }
])

docs = 0
for doc in result:
    docs += 1
print(f"Total cours: {docs}")
client.close()

Total cours: 230


In [7]:
client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$group': {
            '_id': None, 
            'min_date': {
                '$min': '$content.created_at'
            }, 
            'max_date': {
                '$max': '$content.created_at'
            },
            "min_update": {
                '$min': '$content.updated_at'
            },
            "max_update": {
                '$max': '$content.updated_at'
            }
        }
    }, {
        '$project': {
            '_id': 0, 
            'min_date': 1, 
            'max_date': 1,
            'min_update': 1,
            'max_update': 1
        }
    }
])

for doc in result:
    print("date min:", doc['min_date'])
    print("date max:", doc['max_date'])
    print("date min update:", doc['min_update'])
    print("date max update:", doc['max_update'])
    
client.close()

date min: 2014-02-03T07:55:29Z
date max: 2022-06-28T08:17:43Z
date min update: 2014-02-03T07:55:29Z
date max update: 2022-06-28T08:17:43Z


In [3]:
client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$group': {
            '_id': '$content.username', 
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$sort': {
            'count': -1
        }
    }
])
users = {}
for doc in result:
    users[doc['_id']] = doc['count']
print("Total utilisateurs:", len(users))

print(dict(sorted(users.items(), key=lambda item: item[1], reverse=True)[:10]))
client.close()


Total utilisateurs: 36243
{None: 4758, 'pigret': 132, 'nabla09': 131, 'jphzapata': 98, 'mhuten': 96, 'JPBAUJOT': 76, 'trx337': 73, 'MS48': 72, 'Thierry_DellaTres': 69, 'FrancoiseS': 67}


In [None]:
client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$project': {
            'comments_count': {
                '$ifNull': ['$content.comments_count', 0]
            }
        }
    }, {
        '$group': {
            '_id': None, 
            'max_comments_count': {
                '$max': '$comments_count'
            }
        }
    }, {
        '$project': {
            '_id': 0, 
            'max_comments_count': 1
        }
    }
])
for doc in result:
    print("max comments count:", doc['max_comments_count'])

max comments count: 4578


In [17]:
from pymongo import MongoClient

# Requires the PyMongo package.
# https://api.mongodb.com/python/current

client = MongoClient('mongodb://localhost:27017/')
result = client['mooc']['posts_mooc'].aggregate([
    {
        '$group': {
            '_id': '$content.course_id', 
            'min_create': {
                '$min': '$content.created_at'
            }, 
            'max_create': {
                '$max': '$content.created_at'
            }, 
            'min_update': {
                '$min': '$content.updated_at'
            }, 
            'max_update': {
                '$max': '$content.updated_at'
            }
        }
    }
])

for doc in result:
    print(f"{doc['_id']:80} : {doc['min_create'].split("T")[0]} {doc['max_create'].split("T")[0]} {doc['min_update'].split("T")[0]} {doc['max_update'].split("T")[0]}")

course-v1:education-et-numerique+127001+session01                                : 2017-05-16 2017-07-10 2017-05-16 2017-07-10
course-v1:itii+119002+session02                                                  : 2017-09-18 2017-12-18 2017-09-18 2017-12-18
course-v1:CNAM+01030+session01                                                   : 2018-03-08 2018-05-14 2018-03-08 2018-05-14
course-v1:inria+41003+session03                                                  : 2017-05-11 2017-07-01 2017-05-11 2017-07-01
course-v1:ensae+53001+session02                                                  : 2017-10-10 2017-10-27 2017-10-10 2017-10-27
course-v1:MinesTelecom+04006+session08                                           : 2017-08-23 2018-10-27 2017-08-23 2018-10-27
course-v1:SciencesPo+05009+session01                                             : 2017-10-23 2018-01-08 2017-10-23 2018-01-08
course-v1:ENSCachan+20014+session02                                              : 2017-09-29 2017-12-20 2017-0

In [39]:
from pymongo import MongoClient

# Requires the PyMongo package.
# https://api.mongodb.com/python/current

client = MongoClient('mongodb://localhost:27017/')
filter={
    '$and': [
        {
            'content.children': {
                '$exists': False
            }
        }, {
            'content.endorsed_responses': {
                '$exists': False
            }
        }
    ]
}

result = client['G0']['threads'].find(
  filter=filter
)

for doc in result:
    print(f"{doc['content']['course_id']:80} : {doc['content']['created_at'].split('T')[0]} {doc['content']['updated_at'].split('T')[0]} {doc['content']['username']} {doc['content']['title'][:20]}")

In [45]:
from pymongo import MongoClient

# Requires the PyMongo package.
# https://api.mongodb.com/python/current

client = MongoClient('mongodb://localhost:27017/')
filter={}
project={
    'content.id': 1, 
    'content.course_id': 1, 
    'content.username': 1,
    'content.children': 1,
    'content.depth': 1
}

result = client['mooc']['posts_mooc'].find(
  filter=filter,
  projection=project
)

def stevefunk(content):
    username = content.get("username", "")
    courseid = content.get("course_id", "")
    id = content.get("id", "")
    children = content.get("children", [])
    depth = content.get("depth", "?")
    print(f"{depth} {id} {courseid:80} {username}")
    for doc in children:
        stevefunk(doc)

for doc in result:
    content = doc["content"]
    print("---------------------------------------------------------------------------------------------------------------------------------------------")
    stevefunk(content)

---------------------------------------------------------------------------------------------------------------------------------------------
? 52ef4b71ab137b00720007d4 CNAM/01002/Trimestre_1_2014                                                      qb
0 52ef5f60919cec5e32000962 CNAM/01002/Trimestre_1_2014                                                      MountacirAmar
---------------------------------------------------------------------------------------------------------------------------------------------
? 52ef4d79b4907d2e23000996 CNAM/01002/Trimestre_1_2014                                                      fidji
0 52ef73945c4baf9e610008a8 CNAM/01002/Trimestre_1_2014                                                      ElenaThomas
1 52efa3b5cfc81d7e410009a3 CNAM/01002/Trimestre_1_2014                                                      BEJA
0 52ef74b13c9f0ce003000937 CNAM/01002/Trimestre_1_2014                                                      ElenaThomas
----------------

KeyboardInterrupt: 

In [None]:
from pymongo import MongoClient

# Requires the PyMongo package.
# https://api.mongodb.com/python/current

client = MongoClient('mongodb://localhost:27017/')
filter={}
project={
    'content' : 1
}

result = client['mooc']['posts_mooc'].find(
  filter=filter,
  projection=project
)

def stevefunk(content):
    username = content.get("username", "")
    courseid = content.get("course_id", "")
    id = content.get("id", "")
    
    children = content.get("children", [])
    endorsed_responses = content.get('endorsed_reponses',[])
    non_endorsed_responses = content.get('non_endorsed_reponses',[])
    
    depth = content.get("depth", "?")
    print(f"{depth} {id} {courseid:30} {username}", flush=True)
    content['_id'] = id

    result = client['mooc']['messages'].find_one({'_id' : id})
    if result is None:
        client['mooc']['messages'].insert_one(content)
    
    for doc in children:
        stevefunk(doc)
    
    for doc in endorsed_responses:
        stevefunk(doc)

    for doc in non_endorsed_responses:
        stevefunk(doc)

for doc in result:
    content = doc["content"]
    print("-" * 100, flush=True)
    stevefunk(content)

----------------------------------------------------------------------------------------------------
? 52ef4b71ab137b00720007d4 CNAM/01002/Trimestre_1_2014    qb
0 52ef5f60919cec5e32000962 CNAM/01002/Trimestre_1_2014    MountacirAmar
----------------------------------------------------------------------------------------------------
? 52ef4d79b4907d2e23000996 CNAM/01002/Trimestre_1_2014    fidji
0 52ef73945c4baf9e610008a8 CNAM/01002/Trimestre_1_2014    ElenaThomas
1 52efa3b5cfc81d7e410009a3 CNAM/01002/Trimestre_1_2014    BEJA
0 52ef74b13c9f0ce003000937 CNAM/01002/Trimestre_1_2014    ElenaThomas
----------------------------------------------------------------------------------------------------
? 52ef4f99344caaf903000158 CNAM/01002/Trimestre_1_2014    ambruleaux
0 52efba725c4baf9e61000928 CNAM/01002/Trimestre_1_2014    danielgv
----------------------------------------------------------------------------------------------------
? 52ef50b5cfc81d7e4100090e CNAM/01002/Trimestre_1_2014    Er

In [1]:
from pymongo import MongoClient

# Requires the PyMongo package.
# https://api.mongodb.com/python/current

client = MongoClient('mongodb://localhost:27017/')
filter={}
project={
    'content' : 1
}

result = client['mooc']['data'].find(
  filter=filter,
  projection=project
)

def stevefunk(content, parent_id=None):
    username = content.get("username", "")
    courseid = content.get("course_id", "")
    id = content.get("id", "")
    
    children = content.get("children", [])
    endorsed_responses = content.get('endorsed_reponses',[])
    non_endorsed_responses = content.get('non_endorsed_reponses',[])
    
    content['_id'] = id
    if content.get("depth", 0) == 1:
        content["parent_id"] = parent_id

    result = client['mooc']['documents'].find_one({'_id' : id})
    if result is None:
        client['mooc']['documents'].insert_one(content)
    
    for doc in children:
        stevefunk(doc, parent_id=id)
    
    for doc in endorsed_responses:
        stevefunk(doc, parent_id=id)

    for doc in non_endorsed_responses:
        stevefunk(doc, parent_id=id)

for doc in result:
    content = doc["content"]
    stevefunk(content)
print("done")

done
