# Working with Distinct Values, $elemMatch, and Regex

## An exceptional laureate

In [None]:
from pymongo import MongoClient

client = MongoClient()
db = client.nobel

In [None]:
db.laureates.find_one({"prizes.2": {"$exists": True}})

## Using .distinct()

In [None]:
db.laureates.distinct("gender")

- A convenience method for a common aggregation (like `count_documents`)
- We will not cover custom aggregations in this lesson, but the `aggregate` method is powerful.
- `distinct` aggregation is efficient if there is a collection *index* on the field
- We will learn how to create an index later in this lesson
- No index needed here: collection fits in memory, has ≲ 1,000 documents

## All prize categories vs those with multi-winners

In [None]:
db.laureates.distinct("prizes.category")

In [None]:
db.laureates.distinct(
  "prizes.category", {"prizes.1": {"$exists": True}})

## Enter $elemMatch

In [None]:
db.laureates.count_documents({
    "prizes": {
        "category": "physics",
        "share": "1"
    }
})

In [None]:
db.laureates.count_documents({
    "prizes.category": "physics",
    "prizes.share": "1"
})

In [None]:
db.laureates.count_documents({
    "prizes": {
        "$elemMatch": {
            "category": "physics",
            "share": "1"
        }
    }
})

In [None]:
db.laureates.count_documents({
    "prizes": {
        "$elemMatch": {
            "category": "physics",
            "share": "1",
            "year": {"$lt": "1945"},
        }
    }
})

## Finding a substring with $regex

In [None]:
db.laureates.find_one({"firstname": "Marie"})

In [None]:
case_sensitive = db.laureates.distinct(
    "bornCountry",
    {"bornCountry": {"$regex": "Poland"}})

In [None]:
case_insensitive = db.laureates.distinct(
    "bornCountry",
    {"bornCountry": {"$regex": "poland", "$options": "i"}})

assert set(case_sensitive) == set(case_insensitive)

In [None]:
from bson.regex import Regex

db.laureates.distinct("bornCountry", 
                      {"bornCountry": Regex("poland", "i")})

In [None]:
import re

db.laureates.distinct("bornCountry",
                      {"bornCountry": re.compile("poland", re.I)})

## Beginning and ending (and escaping)

In [None]:
from bson.regex import Regex

db.laureates.distinct("bornCountry", 
                      {"bornCountry": Regex("^Poland")})

In [None]:
db.laureates.distinct(
    "bornCountry", 
     {"bornCountry": Regex("^Poland \(now")})

In [None]:
db.laureates.distinct(
    "bornCountry", 
     {"bornCountry": Regex("now Poland\)$")})