# Connection to database

In [2]:
from pymongo import MongoClient

client = MongoClient("127.0.0.1", 27017)

db = client.qr

# Schools analytics

## Data request

In [44]:
cursor = db.quant_researchers.aggregate([
  { "$unwind": "$education" }, # Unwind the education array
  { "$group": { "_id": "$education.school", "count": { "$sum": 1 } } }, # Group by school and count occurrences
  { "$sort": { "count": -1 } } # Sort by count in descending order
])

data = list(cursor)

## Displaying data

In [45]:

import plotly.express as px
fig = px.bar(data[:20], x="_id", y="count", labels={'_id':'Schools'})
fig.update_xaxes(showticklabels=False) # we hide x labels because it takes too much room. We can see school names by hovering bars
fig.show()


# Trainings analytics

## Data request

In [46]:
cursor = db.quant_researchers.aggregate([
  { "$unwind": "$education" }, # Unwind the education array
  { "$group": { "_id": "$education.degree", "count": { "$sum": 1 } } }, # Group by school and count occurrences
  { "$sort": { "count": -1 } } # Sort by count in descending order
])

data = list(cursor)

## Displaying data

In [51]:
import plotly.express as px
fig = px.bar(data[:20], x="_id", y="count", labels={'_id':'Training'})
fig.update_xaxes(showticklabels=False) # we hide x labels because it takes too much room. We can see school names by hovering bars
fig.show()

# Masters

In [77]:
cursor = db.quant_researchers.aggregate([
  { "$unwind": "$education" }, # Unwind the education array
  { "$match": { "education.degree": { "$regex": "Master|M\.Sc|MSc", "$options" :'i' } } }, # Match degrees containing "master"
  { "$group": { "_id": "$education.degree", "count": { "$sum": 1 } } }, # Group by school and count occurrences
  { "$sort": { "count": -1 } } # Sort by count in descending order
])
data = list(cursor)

In [80]:
import plotly.express as px
fig = px.bar(data[:40], x="_id", y="count", labels={'_id':'Master'})
fig.update_xaxes(showticklabels=False) # we hide x labels because it takes too much room. We can see school names by hovering bars
fig.show()

In [98]:
cursor = db.quant_researchers.aggregate([
  { "$unwind": "$education" }, # Unwind the education array
  { "$match": { "education.degree": { "$regex": "Master|M\.Sc|MSc", "$options" :'i' } } }, # Match degrees containing "master"
  { "$group": { 
      "_id": { "school": "$education.school", "degree": "$education.degree" }, 
      "count": { "$sum": 1 } 
    } 
  }, # Group by school and degree and count occurrences
  { "$sort": { "count": -1 } } # Sort by count in descending order
])
data = list(cursor)
data = [{ "Master": f"{edu['_id']['school']} | {edu['_id']['degree']}", "count": edu["count"] } for edu in data]

In [99]:
import plotly.express as px
fig = px.bar(data[:40], x="Master", y="count")
fig.update_xaxes(showticklabels=False) # we hide x labels because it takes too much room. We can see school names by hovering bars
fig.show()

# Research by school

In [18]:
cursor = db.quant_researchers.aggregate([
  { "$unwind": "$education" }, # Unwind the education array
  { "$match": { "education.school": { "$regex": "Polytech Nantes", "$options" :'i' } } },
  { "$group": { 
      "_id": { "school": "$education.school" }, 
      "count": { "$sum": 1 } 
    } 
  }
])

data = list(cursor)

# People who went to Polytech

In [37]:
cursor = db.quant_researchers.aggregate([
    {
        "$match": {
            "education.school": {"$regex": "^Polytech ", "$options": "i"}
        }
    }
])

data = list(cursor)

In [36]:
data

[{'_id': ObjectId('65cbc705630f2b2cf0e77742'),
  'profile_url': 'https://www.linkedin.com/in/vincentfuentes',
  'education': [{'school': 'Polytech Lille',
    'degree': "Master's of Science, Statistics and Software Engineering"}]},
 {'_id': ObjectId('65cbd394630f2b2cf0e7786a'),
  'profile_url': 'https://www.linkedin.com/in/zhejiayu-ma',
  'education': [{'school': "Université Côte d'Azur",
    'degree': 'Doctor of Philosophy - PhD, Computer Science'},
   {'school': 'Polytech Nantes',
    'degree': "Diplôme d'ingénieur, Informatique"},
   {'school': 'South China University of Technology',
    'degree': 'Licence, Informatique'}]},
 {'_id': ObjectId('65cbd880630f2b2cf0e778df'),
  'profile_url': 'https://www.linkedin.com/in/vinetlouis',
  'education': [{'school': 'Polytech Nantes',
    'degree': 'Master Cultures numériques, Études pluridisciplinaires'},
   {'school': 'Sorbonne Université',
    'degree': 'Master 2 (M2), Philosophie : Conseil éditorial et gestion des contenus plurimédias'},
 

In [94]:
degrees = [
    {
        "degree": "Parcours Probabilités et Finance",
        "school": "Institut Polytechnique de Paris",
        "school_regex": "Sorbonne|Polytechnique|UPMC|Pierre.*Marie.*Curie",
        "degree_regex": "Probabilit.*Finance|El Karoui"
    },
    {
        "degree": "Modélisation aléatoire, finance et data sciences",
        "school": "Université Paris Cité",
        "school_regex": "Paris.*Cité|Diderot|ENSAE|CentraleSup(e|é)lec|Sorbonne|Polytechnique",
        "degree_regex": "Laure (É|E)lie|M2MO|(Random Mod.*|Mod.* Al(é|e)atoire).*Finance.*Data Science"
    },
    {
        "degree": "Ingénierie Statistique et Financière",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Ing(é|e)nierie Statistique.*Financi(e|è)re|Statistical.*Financial Engineering"
    },
    {
        "degree": "Research in Finance",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Re(search|cherche) (i|e)n Finance"
    },
    {
        "degree": "Mathématiques de l'Assurance de l'Economie et de la Finance",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": " Math.*(As|In)surance.*Econom.*Finance"
    },
    {
        "degree": "Ingénierie économique et financière",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Ingénierie économique et financière|Economic and financial engineering"
    },
    {
        "degree": "Mathématiques, Apprentissage, Sciences et Humanités",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Math.*(Apprentissage|Learning).*Sciences.*Human.*"
    },
    {
        "degree": "Quantitative Economic Analysis",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Quantitative Economic Analysis|Analyse quantitative (é|e)conomique"
    },
    {
        "degree": "Financial Markets",
        "school": "Université Paris Dauphine-PSL",
        "school_regex": "Dauphine|PSL",
        "degree_regex": "Financial Markets|Marchés Financiers"
    },
    {
        "degree": "Modélisation et Méthodes Mathématiques en Economie et Finance​​",
        "school": "Université Panthéon - Sorbonne",
        "school_regex": "Panthéon.*Sorbonne",
        "degree_regex": "MMMEF|Mod.*(Méth.*Math.*|Math.*Meth*).*Econom.*Finance​​"
    },
    {
        "degree": "Modélisations statistiques économiques et financières",
        "school": "Université Panthéon - Sorbonne",
        "school_regex": "Panthéon.*Sorbonne",
        "degree_regex": "MOSEF|Modélisation.*statistique.*économique.*financière|Economic.*financial statistical modeling"
    },
    {
        "degree": "Ingénierie du Risque : Finance et Assurance",
        "school": "Université Panthéon - Sorbonne",
        "school_regex": "Panthéon.*Sorbonne|Afia",
        "degree_regex": "Ing(é|e)nierie du Risque.*Finance.*Assurance|IRFA|Risk Engineering.*Finance.*Insurance|Ingénierie Mathématique de la Finance|Math.*Eng.*Finance"
    },
    {
        "degree": "Monnaie, Banque, Finance, Assurance (Parcours techniques financières.*bancaires)",
        "school": "Université Panthéon - Assas",
        "school_regex": "Panthéon.*Assas",
        "degree_regex": "Mon.*Ban.*Finance|Technique.*Financ.*Bancaires|Financ.*Bank.*Tech.*"
    },
    {
        "degree": "Ingénierie financière et modèles aléatoires",
        "school": "Sorbonne Université Sciences",
        "school_regex": "Sorbonne|CFA.*Science",
        "degree_regex": "IFMA|Ingénierie mathématique|Math.*Engineering|Ingénierie financière.*modèles aléatoires|Financial engineering.*random models"
    },
    {
        "degree": "Probabilités et Modèles Aléatoires",
        "school": "Sorbonne Université Sciences",
        "school_regex": "Sorbonne",
        "degree_regex": "Probabilit.*Modèles Aléatoires|Probability.*Random Models"
    },
    {
        "degree": "Ingénierie Statistique et Informatique de la Finance, de l'Assurance et du Risque",
        "school": "Université Paris Cité",
        "school_regex": "Paris.*Cité",
        "degree_regex": "Ing.*Stat.*Informatique.*Finance.*Assurance.*Risque|Stat.*Computer Engineering.*Finance.*Insurance.*Risk|ISIFAR"
    },
    {
        "degree": "Statistique du risque",
        "school": "Université de Nanterre",
        "school_regex": "Nanterre",
        "degree_regex": "Stat.*ris.*|ISEFAR"
    },
    {
        "degree": "DATA SCIENCE : SANTÉ, ASSURANCE ET FINANCE",
        "school": "Université Paris-Saclay",
        "school_regex": "Paris.*Saclay",
        "degree_regex": "(SANTÉ|Health).*(AS|In)SURANCE.*FINANCE"
    },
    {
        "degree": "Finance quantitative",
        "school": "Université Paris-Saclay",
        "school_regex": "Paris.*Saclay",
        "degree_regex": "M2QF|FINANCE QUANTITATIVE|Quant.*Finance"
    },
    {
        "degree": "Mathématiques de l'aléatoire",
        "school": "Université Paris-Saclay",
        "school_regex": "Paris.*Saclay",
        "degree_regex": "Math*.(Random|Aléa).*"
    },
    {
        "degree": "Mathématiques de la finance et des données",
        "school": "Université Gustave Eiffel & UPEC",
        "school_regex": "Gustave.*Eiffel|UPEC|Paris.*Est|(E|É)cole.*Ponts",
        "degree_regex": "Lamberton|MFD|Math.*Finance.*D(ata|onnées)"
    },
    {
        "degree": "Mathématiques et informatique",
        "school": "UPEC",
        "school_regex": "UPEC|Paris.*Est",
        "degree_regex": "Math.*(INFORMATIQUE|Computer)"
    },
    {
        "degree": "Probabilités et Statistiques des nouvelles données",
        "school": "Université Gustave Eiffel & UPEC",
        "school_regex": "Gustave.*Eiffel|UPEC|Paris.*Est",
        "degree_regex": "Proba.*Stat.*(nouvelles données|new data)"
    },
    {
        "degree": "Mathématiques Appliquées à l'Ingénierie Financière",
        "school": "CY Cergy Paris Université",
        "school_regex": "CY|Cergy",
        "degree_regex": "Math.*Appli.*(Financial Engineering|Ing.*Financ.*)|MAIF"
    },
    {
        "degree": "Mathematics of Randomness",
        "school": "Institut Polytechnique de Paris",
        "school_regex": "Polytechnique|ENSAE|ENSTA|T(é|e)l(é|e)com.*Paris",
        "degree_regex": "Mathematics of Randomness|Mathématiques de l'aléatoire"
    },
    {
        "degree": "Statistics, Finance and Actuarial Science",
        "school": "Institut Polytechnique de Paris",
        "school_regex": "Polytechnique|ENSAE|ENSTA|T(é|e)l(é|e)com.*Paris",
        "degree_regex": "Stat.*Finance.*Actuaria.*"
    },
    {
        "degree": "Ingénierie Financière et Modélisation",
        "school": "Université Sorbonne Paris Nord",
        "school_regex": "Sorbonne.*Nord|UFR de sciences économiques et gestion|SEG",
        "degree_regex": "Ingénierie Financière et Modélisation|Financial Engineering and Modeling"
    },
    {
        "degree": "Ingénierie des Risques Financiers",
        "school": "Institut de science financière et d'assurances",
        "school_regex": "ISFA|Institut de science financière et d'assurances",
        "degree_regex": "Ingénierie des Risques Financiers|Financial Risk Engineering|IRF"
    },
    {
        "degree": "Finance Quantitative",
        "school": "Grenoble IAE",
        "school_regex": "Grenoble IAE|INP|UGA|Grenoble.*Alpes",
        "degree_regex": "Finance Quantitative|Quant.*Finance"
    },
    {
        "degree": "Mathématiques, finance computationnelle, actuariat",
        "school": "Université de Lille",
        "school_regex": "Universit.*Lille|Lille",
        "degree_regex": "Math.*(finance computationnelle|comput.*finance).*actuaria.*"
    },
    {
        "degree": "Finance quantitative",
        "school": "IAE Lille",
        "school_regex": "Universit.*Lille|IAE Lille|Lille",
        "degree_regex": "Finance Quantitative|Quant.*Finance"
    },
    {
        "degree": "Informatique et Mathématiques Appliquées à la Finance et à l'Assurance",
        "school": "Université Côte d'Azur",
        "school_regex": "Nice|Azur",
        "degree_regex": "IFAMA|Informatique.*Mathématiques Appliquées à la Finance et à l'Assurance|Computer Science and Mathematics Applied to Finance and Insurance"
    },
    {
        "degree": "M. Sc. Finance mathématique et computationnelle",
        "school": "Université de Montréal",
        "school_regex": "Montréal",
        "degree_regex": "Finance mathématique et computationnelle|Math.*comput.*Finance"
    },
    {
        "degree": "MCom (Quantitative Finance) Research",
        "school": "University of Johannesburg",
        "school_regex": "Johannesburg",
        "degree_regex": "Quant.*Finance"
    },
    {
        "degree": "Quantitative Finance",
        "school": "Wirtschafts Universität",
        "school_regex": "Wirtschafts|WU.*Vienna",
        "degree_regex": "Quant.*Finance"
    },
    {
        "degree": "M. Sc. Finance",
        "school": "Universität Ulm",
        "school_regex": "Universität Ulm|University of Ulm",
        "degree_regex": "Finance"
    },
    {
        "degree": "Financial mathematics",
        "school": "Université du Luxembourg",
        "school_regex": "Universit.*Luxembourg",
        "degree_regex": "Financ.*math|Math.*Finan"
    },
    {
        "degree": "M. Sc. in Quantitative Finance",
        "school": "University of Zurich",
        "school_regex": "University.*Zurich",
        "degree_regex": "Quant.*Finance"
    },
    {
        "degree": "M. Sc. in Quantitative finance",
        "school": "Erasmus University Rotterdam",
        "school_regex": "Rotterdam",
        "degree_regex": "Quant.*Finance"
    },
    {
        "degree": "Mathematical Finance and Actuarial Science",
        "school": "Technical University of Munich",
        "school_regex": "Munich",
        "degree_regex": "Math.*Finance.*Actu.*Science"
    },
    {
        "degree": "Stochastics and Financial Mathematics",
        "school": "University of Amsterdam",
        "school_regex": "Amsterdam",
        "degree_regex": "Stochas.*(Math.*Finan.*|Finan.*Math.*)"
    },
    {
        "degree": "Quantitative finance",
        "school": "University of Bologna",
        "school_regex": "Bologna|Bologne",
        "degree_regex": "Quant.*Finance"
    },
    {
        "degree": "Quantitative finance",
        "school": "Vienna University of Economics and Business",
        "school_regex": "Vienna",
        "degree_regex": "Quant.*Finance"
    }
]

# Every spotted master

In [95]:

for degree in degrees:

  cursor = db.quant_researchers.aggregate([
    { 
      "$match": {
          "$and": [
            { "education.school": {"$regex": degree["school_regex"]} },
            { "education.degree": {"$regex": degree["degree_regex"]} }
          ]
      } 
    },
    { 
      "$group": { 
        "_id": None, 
        "count": { "$sum": 1 } 
      } 
    }
  ])
  data = list(cursor)
  if data:
    degree["count"] = data[0]["count"]
  else:
    degree["count"] = 0

In [97]:
import plotly.express as px

sorted_degrees = sorted(degrees, key=lambda x: x["count"], reverse=True)
fig = px.bar(sorted_degrees, x="degree", y="count", hover_data=['school'])
fig.update_xaxes(showticklabels=False) # we hide x labels because it takes too much room. We can see school names by hovering bars
fig.show()