What the final data must look like
```
user_data = {
    "userID": user,
    "pre_test_score": pre_score,
    "post_test_score": post_score,
    "RPL": realized_learning_gain,
    "ALG": absolute_learning_gain,
    "topic_id": topic_id,
    "topic_title": topic,
    "session_duration": time_elapsed,
    "clicks": [],
}
```

In [335]:
import os
import sys

import bson

from tqdm.auto import tqdm


data_path = "../../data/searchx_chiir2020/"
sys.path.append("../")


def g_path(x=None, y=data_path):
    if x:
        return os.path.join(y, x)
    return y

## Load data

In [135]:
original_users = {
    "5caf6c27b480520012eb741b",
    "5c864b1bd9fe700001023b72",
    "5d27b8fee5af0b00157f41a6",
    "5a97081189de8200013eea9b",
    "5a69fb84eeef45000138d276",
    "5c9162220791df0001366aa6",
    "5d26f5eb7aa0b0000138b4ba",
    "5d0a993c6e9965001aeafe67",
    "5b0c5ae5641b1200010ee49d",
    "5b09bd6ae9270900013b82be",
    "5d269f7442562200018babe6",
    "5c3fb34812dd430001d4b4d5",
    "59bb884a3c45a10001ccc4f7",
    "5d5ba6430ffa48001b458d64",
    "5c676d436f250200012a04e8",
    "58d0632c2fc72000011f8c57",
    "5d3a3b28376aea001a66782d",
    "5d235fa4c9b2ee00190ed9af",
    "57843b986475d400015a8e45",
    "55f244e632af740012170458",
    "5c9e5a042b3c77001544e975",
    "59f8868e05d1d60001f1464d",
    "5b82d4fda284bc000193094c",
    "5c54b2e0cf1cd900018cf6a6",
    "5952a1dc27f8fb00016213bb",
    "54a7c951fdf99b7cd759d23e",
    "5bf1de1556a321000106fae0",
    "5c1a32c91f191b0001ebe392",
    "5bcdf10432e46700016f8f80",
    "5d91a7c336017c00192a6755",
    "5bddad5e3dad0a0001e2d79b",
    "5cacf324ea201200013d7807",
    "5b421b9ac2e3810001763252",
    "595022904df2c40001bf85df",
    "5d404bc74db54a0001fecb92",
    "5d8d0decf345720017634bc0",
    "5977e3867412f8000194e1fe",
    "5cf1b0f475b61a0001d00408",
    "5b0b1b5a9663ee0001456cec",
    "5993020a5f49420001caf86b",
    "5d114bdc009d5e001ac94487",
    "58fd0de6d66f6400016dc0fd",
    "5d4fe8e3f20feb00190b643c",
    "59308b35deea010001017c0e",
    "5d30eee5e7ca2e001bd4091b",
    "5d971ac046a93c0017005bf1",
    "5b0f27321e55760001b9a8a5",
    "5d8e3a178b0c840017736451",
    "5c742bf8ecf82a0015630ec1",
    "56cb839cedf8da000c6df35b",
    "5b587dc6efcd5c0001cebd03",
    "5d34437867a8ff0019dc0403",
    "5c28ef690091e40001ca5e99",
    "5b6c3818f458f500013774a3",
    "5877f454127d7700018b1bdf",
    "57533f7c483c14000662f391",
    "5be89ff4d153f500019aa86f",
    "5d63bfff79f8dc0015af6de3",
    "5c5cad3e4ee81200018eafe2",
    "5cd0cdf9731ab00013a73906",
    "5c3f00de1580e9000122ecb5",
}

In [280]:
a = bson.decode_all(open(g_path("pages.bson"), "rb").read())

In [287]:
[x for x in a if "what is industrial" in x]

[]

In [137]:
import base64
import gzip
import json

from collections import OrderedDict
from datetime import datetime

import numpy as np
import pandas as pd

from tqdm.auto import tqdm


def p_date(x: str) -> datetime:
    """Short hand for datetime.fromisoformat(x)"""
    if isinstance(x, datetime):
        return x
    return datetime.fromisoformat(x)


all_logs = bson.decode_all(open(g_path("logs.bson"), "rb").read())
clean_logs = [x for x in all_logs if x["userId"] in original_users]
users = original_users

pre_tests = {
    **{x["sessionId"]: x["meta"] for x in clean_logs if x["event"] == "SURVEY_REGISTER_RESULTS"},
    **{x["userId"]: x["meta"] for x in clean_logs if x["event"] == "SURVEY_REGISTER_RESULTS"},
    **{x["meta"]["data"]["userId"]: x["meta"] for x in clean_logs if x["event"] == "SURVEY_REGISTER_RESULTS"},
}

## Metrics
from Simple_log.ipynb

In [256]:
def get_all_logs(user: str) -> list[dict]:
    """Get all logs from one user"""
    user_logs = [x for x in clean_logs if x["userId"] == user]

    return sorted(user_logs, key=lambda x: x["date"])


def get_topic_title(user_logs: list[dict]) -> str:
    """Gets the title of the topic assigned to user"""
    return [x for x in user_logs if x["event"] == "SURVEY_INTERMEDIATE_TEST_RESULTS"][0]["task"]["data"]["topic"][
        "title"
    ]


def get_topic_id(user_logs: list[dict]) -> str:
    """Gets the id of the topic the user was assigned to"""
    for x in user_logs:
        if x["event"] == "SEARCHRESULT_VIEW_URL":
            return x["task"]["data"]["topic"]["id"]


# def get_topic_terms(p: str) -> list[str]:
#     """Gets a list of the terms for the VKS for a given topic id"""
#     return vocab[p]["terms"]


def get_session(user_logs: list[dict]) -> str:
    """Get what type of experiment the user was in"""
    event = [x for x in user_logs if x["event"] == "SEARCHRESULT_VIEW_URL"][0]
    return event["meta"]["session"]


def get_session_duration(user_logs: list[dict]) -> int:
    """Gets how long, in seconds, a user session lasted, from start to end"""
    sorted_logs = sorted(user_logs, key=lambda x: datetime.strptime(x["date"], "%Y-%m-%d %H:%M:%S"))
    end_time = p_date(sorted_logs[-1]["date"])
    start_time = p_date(sorted_logs[0]["date"])
    return (end_time - start_time).seconds


def rebuild_serp(user_log: list[dict], start_event: int) -> dict[str, int]:
    """Rebuilts a SERP, with documents in each position, for a given query
    Returns dict with URL and its position in the SERP.
    CAVEAT: If URL appears twice in the serp (may happen due to filtering process), only first occurence counts."""
    n_events = len(user_log) - start_event
    for idx, e in enumerate(user_log[start_event + 1 :], start=start_event):
        if e["event"] == "SEARCH_QUERY":
            n_events = idx
            break

    query_events = sorted(user_log[start_event : start_event + n_events], key=lambda x: x["date"])
    query_events = [x for x in query_events if "position" in x["meta"]]

    for k in query_events:
        # naive. Filter later.
        k["true_position"] = (k["meta"]["page"] - 1) * 10 + k["meta"]["position"]

    query_events = sorted(query_events, key=lambda x: x["true_position"])
    # Try to rebuild the SERP
    serp = {}
    docs_so_far = 0
    offset = 0
    for ix, e in enumerate(query_events):
        meta = e["meta"]
        url = meta["url"]
        if url in serp:
            continue
        serp[url] = e["true_position"]

    ranking = sorted(serp.items(), key=lambda x: x[1])

    return serp


def get_video_id(user_log, topic_id):
    return [x for x in user_log[0]["task"]["data"]["topics"] if x["id"] == topic_id][0]["youtube"]


def get_topic_terms(user_log, topic):
    return [x for x in user_log[0]["task"]["data"]["topics"] if x["id"] == topic_id][0]["terms"]


def get_ALGs(user_log, topic_id):
    intermid_dict = {}
    score_mapping = {1: 0, 2: 0, 3: 1, 4: 2}
    topic_terms = get_topic_terms(user_log, topic)
    pretest_results = [r for r in user_log if r["event"] == "SURVEY_PRE_TEST_RESULTS"][0]
    intermediate_results = [r for r in user_log if r["event"] == "SURVEY_INTERMEDIATE_TEST_RESULTS"]
    try:
        posttest_results = [r for r in user_log if r["event"] == "SURVEY_POST_TEST_RESULTS"][0]
    except IndexError:
        posttest_results = False

    score = 0
    scores = []

    qids = ["Q-" + str(topic_id) + "-" + str(q) for q in topic_terms]
    for qid in qids:
        score += score_mapping[int(pretest_results["meta"]["data"][qid])]
    intermid_dict[0] = score

    for t in intermediate_results:
        s_id = t["meta"]["session"]
        score = 0
        test = t["meta"]["data"]
        for qid in qids:
            if qid in test:
                score += score_mapping[int(test[qid])]
        intermid_dict[s_id] = score

        if s_id == 3:
            if posttest_results:
                score = 0
                for qid in qids:
                    if qid in posttest_results["meta"]["data"]:
                        score += score_mapping[int(posttest_results["meta"]["data"][qid])]
                intermid_dict["4"] = score
            else:
                intermid_dict["4"] = 0
    for i in range(5):
        if i not in intermid_dict.keys():
            intermid_dict[i] = 0

    # accumulate
    scores = []
    acc = 0
    for i in range(5):
        acc += intermid_dict[i]
        scores.append(acc)
    return scores


def MLG(user_log, topic_id):
    topic_terms = get_topic_terms(user_log, topic_id)
    pretest_results = [r for r in user_log if r["event"] == "SURVEY_PRE_TEST_RESULTS"][0]
    score = 0
    score_mapping = {1: 0, 2: 0, 3: 1, 4: 2}

    for question in topic_terms:
        qid = "Q-" + str(topic_id) + "-" + str(question)
        pre_score = score_mapping[int(pretest_results["meta"]["data"][qid])]
        score += 2 - pre_score
    return score / 10


def get_RPLs(user_log, topic_id):
    _ALG = get_ALGs(user_log, topic_id)
    _MLG = MLG(user_log, topic_id)
    return [x / _MLG for x in _ALG]

## Extract Data

In [374]:
ALGs

[2, 2, 2, 2, 2]

In [378]:
[e for e in user_log if e['event'] == "SURVEY_INTERMEDIATE_TEST_RESULTS"][0]["meta"]["session"]

'1'

In [379]:
video_urls = {}
dataset = []
topics = set()
docs_topic = {}
for count, user in tqdm(enumerate(users), total=len(users)):
    user_log = get_all_logs(user)  # All user events
    topic = get_topic_title(user_log)  # User topic
    topic_id = get_topic_id(user_log)  # User topic id
    video_urls[topic_id] = get_video_id(user_log, topic_id)
    RPLs = get_RPLs(user_log, topic_id)
    ALGs = get_ALGs(user_log, topic_id)

    pre_score, post_score = ALGs[0], ALGs[-1]
    user_data = {
        "userID": user,
        "pre_test_score": pre_score,
        "post_test_score": post_score,
        "RPL": RPLs,
        "ALG": ALGs,
        "topic_id": topic_id,
        "topic_title": topic,
        "clicks": [],
        "tests_events":{1: -1, 2: -1, 3:-1, 4:-1}
    }

    current_query = None
    queries = 0
    query_data = None

    # Iterate over log and find docs and clicks
    tests = 0
    for e_idx, e in enumerate(user_log):
        meta = e["meta"]
        if e['event'] == "SURVEY_INTERMEDIATE_TEST_RESULTS": # Intermediate test!
            s_id = e["meta"]["session"]
            user_data['tests_events'][s_id]=len(user_data["clicks"]) # How many clicks so far
        
        if e["event"] == "SEARCH_QUERY": # new query!
            current_query = meta["query"]

        elif e["event"] == "SEARCHRESULT_CLICK_URL":  # A click!
            docs_topic[meta['url']] = topic
            user_data["clicks"].append({"query_text": current_query, "url": meta["url"]})
    dataset.append(user_data)
json.dump(dataset, open("../../data/nirmal_logs.json", "w"))

  0%|          | 0/61 [00:00<?, ?it/s]

## Get "Golden" data
Mostly from their video's transcripts, manually.

In [366]:
docs_target = {
    "10": "The key technology needed to make biobased products is industrial biotechnology. I think it is good in this unit to carefully describe in a nutshell where it stands for, because perceptions may vary. How does it fit in the context of bioplastics, genetically modified organisms, applications in health, and to biological large scale processes. First let us address biotechnology: We can define biotechnology as the use of biological systems to make a valuable product. Biotechnology is very old, it was used long before people knew about microorganisms. Beer, cheese and bread are all examples of classical biotechnology. But also fruits and vegetables that farmers have improved over the years by breeding can be considered biotechnology. But how do we define “a biological system”? The system is defined as microorganisms or their derivatives, such as enzymes. So microorganisms form the heart of biotechnology. In industrial biotechnology we make industrially relevant chemicals on a large scale with biological systems. These could be products you would normally not associate with biotechnology, like plastics, paint, medicines or fuels. In a nutshell, you have roughly two kinds of industrially useful microorganisms, bacteria and fungi. Bacteria are extremely useful and are widespread. They can be found in your food, such as yoghurt, in your gut, where they are responsible for digestion of food, but also in wastewater treatment. Industrially, they are used to produce insulin. Fungi are the second class of microorganisms, they are the molds that appear on deteriorating bread. Industrially, they have been used, for example, to produce penicillin. Enzymes are produced by micro-organisms as well and they themselves catalyze their own reaction inside or outside the cell. As such, they are also biological systems. Enzymes can be used to improve bread, make cheese, or remove stains more effectively when you wash your clothes. They can also be applied as catalysts in organic synthesis. If we take a closer look at bacteria, you can see they are actually relatively simple micro-organisms. They are prokaryotic, so there is a single cell with a cell membrane in which all processes happen. Fungi are more complex micro-organisms, they are eukaryotes and processes happen in different compartments within the cell. Fungi and bacteria are living organisms because they can grow on feed, and multiply themselves. Microorganisms can grow on and in any biological ecosystem on earth. They can also be trained to affect certain conversions, especially the ones that we find useful in an industrial setting. In order to allow for growth, and to serve as source of carbons, sugars are a suitable form of biomass, that has been commonly applied in industrial biotechnology. Thus for our applications, the organisms are given a form of biomass as feed. Then the microbial conversion of organic material into substances takes place in a process which is called fermentation. The products that result are usually simple molecules such as ethanol, lactic acid, succinic acid or propanediol. In a real-life industrial biotechnology process, many more intermediate steps are needed to link biomass to microorganisms and product. Sugars from biomass need to be released prior to fermentation, and a number of downstream processing steps are needed to obtain purified product. The resulting compounds require additional modification by either chemical or biochemical steps to make the final product. So don’t expect a plastic cup to come out immediately! Several scientific breakthroughs led to the development of industrial biotechnology as we know it today. A number of them occurred right here in Delft the place I come from, and I would like to share some of this with you. Delft owes its rich history in microbiology to the microscope. We all find it very normal that we can visualize micro-organisms, but the microscope was actually invented here in Delft by Antonie van Leeuwenhoek. He was the first one to observe microorganisms, which since then have been used by industrial partners in the Netherlands to make penicillin and spirits. Following the footsteps of van Leeuwenhoek we have some famous professors in microbiology who have discovered a number of very important facts. Professor Beijerink, shown here on the picture, was the first professor in microbiology in the Netherlands and he discovered the Tobacco Mosaic Virus. Van Iterson was the pupil of Beijerink and he developed the mathematical approach to plant growth, named phyllotaxis. Next in line is Kluyver, who discovered the unity in biochemistry. This means that every mechanism on biochemical level is identical. Then professor Kuenen, who is still alive today, he discovered the Anammox bacterium, which had a great impact on nitrogen removal in wastewater treatment processes worldwide. The foundation of many of the microorganisms that are used today was laid right here in Delft. And you can imagine that top performing microorganisms are of vital importance to develop a ‘top' industrial bioprocess. Join us next unit for more 'Technology for biobased products'. ",
    "7": "We've already learned that cellular respiration can be broken down into roughly three phases. The first is glycolysis, which literally means the breaking down of glucose. And then this can occur with or without oxygen. If we don't have oxygen, then we go over to fermentation. We'll talk about that in the future. Go over to fermentation and in humans it produces lactic acid. In other types of organisms it might produce alcohol or ethanol. But if we have oxygen-- and for the most part we're going to assume that we can proceed forward with oxygen-- if there is oxygen, then we could proceed forward to the Krebs cycle. Sometimes called the citric acid cycle because it deals with citric acid. The same thing that's in orange juice or lemons. And then from there we proceed to the electron transport chain. And we learned in the first overview video of cellular respiration that this is where the bulk of the ATP is actually produced. Although it uses raw materials that came out of these phases up here. Now what I want to do in this video is just focus on glycolysis. And this is kind of-- it's sometimes a challenging task because you can really get stuck in the weeds. And I'll show you the weeds in a little bit, and the actual mechanism. And it can be very daunting. But what I want to do is simplify it for you so you can have the big take-aways. And then we can appreciate, and then maybe when we look at the weeds of glycolysis we can make a little bit more sense of it. So glycolysis, or really cellular respiration, it starts off with glucose. And glucose, we know its formula. It's C6H12O6. And I could draw its whole structure; it would take a little time. But I'm just going to focus on the carbon backbone. So it is a ring, or can be a ring. But I'm just going to draw it as six carbons in a row. Now there's two kind of important phases of glycolysis that are good to know. One, I call the investment phase. And the investment phase actually uses two ATPs. So you know, the whole purpose of cellular respiration is to generate ATPs, but right from the get-go I actually have to use two ATPs. But I use two ATPs and then I'm essentially going to break up the glucose into two 3-carbon compounds right here that actually also have a phosphate group on them. The phosphate groups are coming from those ATPs. They also have a phosphate group on them and this is often called-- well, there's a lot of names for it. Sometimes it's called PGAL. You really don't have to know this. Or phosphoglyceraldehyde, really challenging my spelling skills right here. That's not that important to know. All you have to know is in this first phase you use two ATPs. That's why I call it the investment phase. If we use a business analogy, investment phase. And then each of these two PGAL molecules can then go into the payoff phase. So in the payoff phase, each of these PGALs turn into pyruvate. Which is another 3-carbon, but it's reconfigured. But the process of it going to pyruvate-- and let me write pyruvate in blue, because this is something that, at least it's good to know the word. And I'll show you the structure in a second. Pyruvate. Sometimes it's called pyruvic acid. Same thing. And that's essentially the end product of glycolysis. So you start off with glucose in the investment phase. You end up in this phosphoglyceraldehyde, which essentially you broke up your glucose and you put a phosphate on either end of it. And then those each independently go through the payoff phase. So you end up with two molecules of pyruvate for every molecule of glucose you started off with. Now you're saying, hey, Sal, there was a payoff phase, what was our payoff? Well our payoff, we got, for each-- let me write this down as a payoff phase. This is our payoff phase. And I apologize for the white background. I did it because, the mechanism I'm showing you, I copy-and-pasted it from Wikipedia, and they had a white background so I just ran with the white background for this video. But I, personally at least, like the black background a lot better. But this is the payoff phase right here. And so when we go from the phosphoglyceraldehyde to the pyruvate or the pyruvic acid, we produce two things. Or I guess we could say we produce three things. We produce, each of these PGALs to pyruvates produce two ATPs. So I'm going to produce two ATPs there, I'm going to produce two ATPs there. And then they each produce an NADH. And I'll do it in a darker color. NADH. And of course they're not producing the whole molecule in a vacuum. Essentially what they're doing is they're starting with the raw material of an NAD plus-- so they start off with an NAD plus-- and they essentially reduce it by adding a hydrogen. Remember, we learned a couple of videos ago that you could view reduction as a gain in hydrogen. So the NAD gets reduced to NADH. And then later on, these NADHs are used in electron transport chain to actually produce ATPs. So the big take-away here, if I were to write the reaction that we get for glycolysis, is that you start off with a glucose. And you need some NAD plus. And actually, for every mole of glucose, you're going to need two NAD plusses. You're going to need two ATPs. So I'm just writing all the ingredients that we need to start off with. And then you're going to need-- well, let me say, these guys are going to be ADPs before we turn them to ATPs. So I'll write plus four ADPs. And then, after performing glycolysis-- and let me write it here. Let me write also-- sorry that was ADPs. Let me just rewrite that part right there. Four ADPs. And then you maybe need two phosphate groups. Because we're going to need four phosphate groups. Plus four-- I'll just call them, sometimes they're written like that. But maybe I'll write it like this. Four phosphate groups. And then once you perform glycolysis, you have two pyruvates, you have two NADHs. The NAD has been reduced. It gained a hydrogen. RIG. OIL RIG. Reduction is gain an electron. But in the biological sense, we think of it gaining the hydrogen. Because hydrogen is very non-electronegative, so you're hogging its electrons. You've gained its electrons. So two NADHs and then plus these two ATPs get used in the investment phase. That's why I kind of wrote them a little separately. So these two get used. So then you're left with two ADPs. And then these guys, essentially, get turned into ATPs. So plus four ATPs. I guess we didn't need four. We only needed a net of two phosphate groups. Because two jump off of here. And then we need a total of two more to get four jumping on there. But the big picture is, you start with a glucose, you end up with two pyruvates. You use up two ATPs. You get four ATPs. So you have a net of two ATPs formed. Let me write that very big. Net, what you get out of glycolysis, is two ATPs. You get two NADHs that can each later be used in the electron transport chain to produce three ATPs. You get two NADHs and you get two pyruvates, which are going to be re-engineered into acetyl-CoAs that are going to be the raw materials for the Krebs cycle. But these are the outputs of glycolysis. So now that we have that big picture, let's actually look at the mechanism. Because this is a little bit more daunting when you see it here. But we'll see the same themes that I just talked about. We're starting with a glucose right there. It is a six chain. It's in a circle, in a ring. One, two, three, four, five, six carbons. I could write it like that, just to make a huge oversimplification. It goes through a few steps. I use an ATP here. So let me do that in a color. Let me do it in orange whenever I use an ATP. I use one ATP there. I use one ATP there. And just like I told you, they have a slightly different name for it. But this is the phosphoglyceraldehyde right here. They call it glyceraldehyde 3-phosphate. It's the exact same molecule. But as you can see, just when I drew it very roughly before, you've got one, two three carbons there. And it also has a phosphate group on it. The phosphate group's actually attached to the oxygen. But for just for simplification I draw the phosphate group just like that. And I showed that right here. This was the phosphoglyceraldehyde right here. This is the actual structure up here. But I think sometimes when you look at the structure it's easy to miss the big picture. And there are two of these. They kind of say that you can go back and forth with this, with this other kind of isomer of this. But the important thing is that you have two of these compounds that are now 3-carbon compounds. Glucose has been split. And now we're ready to enter the payoff phase. Remember you have two of these compounds right here. That's why, when they drew this mechanism, they wrote times two right there. Because the glucose has been split into two of these molecules. So each of the molecules are now going to do this right here. And for each of the glyceraldehyde 3-phosphates, or PGALs, or phosphoglyceraldehyde, we can look at the mechanism and say, OK look here, there's going to be an ADP turning into an ATP there. So this is plus one ATP. And then we see it again happening here on our way to pyruvate. On our way to pyruvate right, there then we have another plus one ATP. So for each of the PGALs, or the phosphoglyceraldehydes that were produced, we're producing two ATPs in the payoff phase. Now there were two of these. So total for one glucose, we're going to produce four ATPs in the payoff phase. So in the payoff phase, four ATPs. In the investment phase we used one, two ATPs. So total net ATPs directly generated from glycolysis is two ATPs. Four, gross produced. But we had to invest two in the investment phase. And then the NADs and the NADHs, we see right here. For each phosphoglyceraldehyde, or glyceraldehyde 3-phosphates or PGALs or whatever you want to call them, at this stage right here you see that we are reducing NAD plus to NADH. So this happens once for each of these compounds. And obviously there are two of these. Glucose got split into two of these guys. So two NADHs are going to be produced. And later these are going to be used in the electron transport chain to actually each produce three ATPs. And then finally, when everything is said and done, we're left with the pyruvates. And it's nice, at least that they made it nice and big. We can take a look at what a pyruvate looks like. And just as promised, we can look at all the oxygen bonds and all that. But it's a 3-carbon structure. It has a 3-carbon backbone. So the end result is that the carbon, that the glucose got split in half. It got oxidized. Some of the hydrogens got stripped off of it. As you can see there's only three hydrogens here. We started off with 12 hydrogens in glucose. And now it has its carbons bonding more strongly with oxygen. So it's essentially having its electrons stolen by the oxygens, or hogged by the oxygens. So carbon has gotten oxidized in this process. There's going to be more oxidation left to do. And in the process we were able to generate two net ATPs and two NADHs that can later be used to produce ATPs. Anyway, hopefully you found that helpful.",
    "4": "Transcriber: Andrea McDonough Reviewer: Jessica Ruby In all times and places in our history, human beings have wondered, 'Where did we come from? What's our place in the world? What happens to us after we die?' Religions are systems of belief that have developed and evolved over time in response to these and other eternal mysteries, driven by the feeling that some questions can only be answered by faith and based on an intuition that there is something greater than ourselves, a higher power we must answer to, or some source we all spring from and to which we must return. Hinduism means the religions of India. It's not a single religion but rather a variety of related beliefs and spiritual practices. It dates back five millennia to the time of Krishna, a man of such virtue that he became known as an avatar of Vishnu, an incarnation of the god in human form. He taught that all life follows karma, the law of cause and effect, and our job is to do our duty, or dharma, according to our place in society without worrying how things turn out. When we die, we are reincarnated into a new body. If we followed our dharma and did our proper duty in our past life, we get good karma, which sends our soul upward in the social scale. Our rebirth into the next life is thus determined by what we do in this one. The wheel of rebirths is called samsara. It's possible for a very holy person to lead a life with enough good karma to escape the wheel. This escape is called moksha. Hinduism teaches that everything is one. The whole universe is one transcendent reality called Brahman, and there's just one Brahman but many gods within it, and their roles, aspects, and forms differ according to various traditions. Brahma is the creator, Vishnu is the preserver who sometimes takes on human form, and Shiva is the transformer, or Lord of the Dance. Durga is the fiercely protective divine mother. Ganesha has an elephant head and is the wise patron of success. Hinduism is the third largest religion in the world. And although most Hindus live in India, they can be found on every continent, one billion strong. Now, let's travel west, across deserts and mountains to the fertile crescent about 4,000 years ago. Judaism began with God calling Abraham and Sarah to leave Mesopotamia and migrate to the land of Canaan. In return for their faith in the one true God, a revolutionary concept in the polytheistic world of that time, they would have land and many descendants. From this promise came the land of Israel and the chosen people, but staying in that land and keeping those people together was going to be very difficult. The Israelites were enslaved in Egypt, but God freed them with the help of the prophet Moses, who received the Ten Commandments and later hundreds more. They conquered the Promised Land, but could only keep it for a few hundred years. Israel sits at a crossroads through which many armies marched over the centuries. And in the year 70, the Romans destroyed the temple in their capital, Jerusalem. So, the religion transformed itself from a temple religion with sacrifices and priests to a religion of the book. Because of this, Judaism is a faith of symbolism, reverence, and deep meanings tied to the literature of its history. The many sacred scriptures make up the Hebrew bible, or Tanakh, and hundreds of written discussions and interpretations are contained in an expansive compendium of deeper meanings, called the Talmud. Jews find rich, symbolic meaning in daily life. At the Passover meal, every item on the menu symbolizes an aspect of the escape from slavery. The importance of growing up is emphasized when young people reach the age of bar and bat mitzvah, ceremonies during which they assume responsibility for their actions and celebrate the weaving of their own lives into the faith, history, and texts of the Jewish people. There are 14 million Jews in the world today, 6 million in Israel, which became independent following the horrors of genocide in World War II, and 5 million in the United States. But now let's go back 2500 years and return to India where Buddhism began with a young prince named Siddhartha. On the night he was conceived, his mother, Queen Maya, is said to have been visited in her sleep by a white elephant who entered her side. Ten months later, Prince Siddartha was born into a life of luxury. Venturing forth from his sheltered existence as a young man, he witnessed the human suffering that had been hidden from him and immediately set out to investigate its sources. Why must people endure suffering? Must we reincarnate through hundreds of lives? At first he thought the problem was attachment to material things, so he gave up his possessions. He became a wandering beggar, which he discovered certainly made him no happier. Then he overheard a music teacher telling a student, 'Don't tighten the string too much, it will break. But don't let it go too slack, or it will not sound.' In a flash, he realized that looking for answers at the extremes was a mistake. The middle way between luxury and poverty seemed wisest. And while meditating under a bodhi tree, the rest of the answer came to him. All of life abounds with suffering. It's caused by selfish craving for one's own fulfillment at the expense of others. Following an eight-step plan can teach us to reduce that craving, and thus reduce the suffering. On that day, Siddhartha became the Buddha, the enlightened one. Not the only one, but the first one. The Buddhist plan is called the Eightfold Path, and though it is not easy to follow, it has pointed the way for millions to enlightenment, which is what Buddhahood means, a state of compassion, insight, peace, and steadfastness. From the time he got up from under that tree to the moment of his death as an old man, the Buddha taught people how to become enlightened: right speech, right goals, a mind focused on what is real, and a heart focused on loving others. Many Buddhists believe in God or gods, but actions are more important than beliefs. There are nearly a billion Buddhists in the world today, mostly in East, Southeast, and South Asia. 2,000 years ago in Judaism's Promised Land, Christianity was born. Just as Hindus called Krishna 'God in Human Form,' Christians say the same thing about Jesus, and Christianity grew out of Judaism just as Buddhism grew out of Hinduism. The angel Gabriel was sent by the God of Abraham to ask a young woman named Mary to become the mother of his son. The son was Jesus, raised as a carpenter by Mary and her husband Joseph, until he turned 30, when he began his public career as the living word of God. Less interested in religiousness than in justice and mercy, Jesus healed the sick in order to draw crowds and then taught them about his heavenly father -- affectionate, forgiving, and attentive. Then, he would invite everyone to a common table to illustrate his Kingdom of God, outcasts, sinners, and saints all eating together. He had only three years before his unconventional wisdom got him into trouble. His enemies had him arrested, and he was executed by Rome in the standard means by which rabble-rousers were put to death, crucifixion. But shortly after he was buried, women found his tomb empty and quickly spread word, convinced that he had been raised from the dead. The first Christians described his resurrected appearances, inspiring confidence that his message was true. The message: love one another as I have loved you. Christians celebrate the birth of Jesus in December at Christmas, and his suffering, death, and resurrection during Holy Week in the spring. In the ceremony of baptism, a washing away of sin and welcoming into the Christian community, recall Jesus's own baptism when he left his life as a carpenter. In the rite of Communion, Christians eat the bread and drink the wine blessed as the body and blood of Jesus, recalling Jesus's last supper. There are two billion Christians worldwide, representing almost a third of the world's people. Islam began 1400 years ago with a man of great virtue, meditating in a mountain cave in the Arabian desert. The man was Muhammad. He was visited by a divine messenger, again the angel Gabriel, in Arabic, Jibril, delivering to him the words of Allah, the one God of Abraham. In the next few years, more and more messages came, and he memorized and taught them. The verses he recited were full of wise sayings, beautiful rhymes, and mysterious metaphors. But Muhammad was a merchant, not a poet. Many agreed the verses were indeed the words of God, and these believers became the first Muslims. The word Muslim means one who surrenders, meaning a person who submits to the will of God. A Muslim's five most important duties are called the Five Pillars: Shahada, Muslims declare publicly, there is no other God but Allah, and Muhammad is his final prophet; Salat, they pray five times a day facing Mecca; Zakat, every Muslim is required to give 2 or 3% of their net worth to the poor; Sawm, they fast during daylight hours for the lunar month of Ramadan to strengthen their willpower and their reliance on God; and Hajj, once in a lifetime, every Muslim who is able must make a pilgrimage to the holy city of Mecca, rehearsing for the time when they will stand before God to be judged worthy or unworthy of eternal life with Him. The words of God, revealed to the prophet over 23 years, are collected in the Quran, which literally translates into 'the recitation.' Muslims believe it to be the only holy book free of human corruption. It's also considered by many to be the finest work of literature in the Arabic language. Islam is the world's second largest religion, practiced by over one and a half billion Muslims around the globe. Religion has been an aspect of culture for as long as it has existed, and there are countless variations of its practice. But common to all religions is an appeal for meaning beyond the empty vanities and lowly realities of existence, beyond sin, suffering, and death, beyond fear, and beyond ourselves.",
    "8": "What do we know from the Urban Water cycle? How do drinking water supply, wastewater discharge and treatment, and the groundwater and surface water systems interact? Welcome to 'Introduction to the Urban Water Cycle', my name is Luuk Rietveld, professor in Drinking water and Urban Water cycle Technology. After this lecture you will be able to understand the key elements of the urban water cycle; the global drivers for urban water management as well as the importance of water reuse within the urban environment. The Urban Water Cycle consists of technologies needed for drinking water intake, treatment and distribution as well as wastewater collection, treatment and discharge. To produce drinking water, water is extracted from the underground, so-called groundwater, or from rivers or lakes, the surface water. This source water isn't drinkable and must be treated. The treatment plant consists of different processes depending on the quality of the source and the requirements for drinking water. Afterwards, the water is transported to the city and distributed via a piped network to households, commercial buildings, public entities and small industries. After usage the water is collected in a sewer system and transported to a wastewater treatment plant, where the water quality is improved for discharge into the receiving water bodies. These surface waters can then, again, be used as a source for drinking water production. What was the main driver for developing the Urban Water Cycle? The centralised sanitation service finds its origin after the Cholera outbreak in London during 1849, when John Snow discovered that there was a clear relationship between Cholera and the use of a contaminated water well. He proved his theory by removing the pump handle from the well which resulted in a dramatic reduction of the spread of the cholera epidemic. Another example of the benefit of centralised drinking water supply is the number of people dying from Typhoid Fever in the Netherlands. This number decreased drastically with increasing number of people connected to the centralised drinking water supply system. In the 70s almost all people were connected and no outbreaks were found anymore. The only new threat is the occurrence of Legionella bacteria in warm water installations. This is especially hazardous for the Netherlands, as here, chlorine isn't used for safety disinfection during distribution. Special designs and cleaning procedures have been developed to diminish the risk of recontamination within the distribution network. The importance of water supply and proper sanitation is reflected in the words of former UN Secretary-General Kofi Annan: 'No single measure would do more to reduce disease and save lives in the developing world than bringing safe water and adequate sanitation to all'. Do we all have sufficient drinking water? While in countries like the Netherlands, the water problems are more or less solved, world-wide there is a mountain to climb. In many countries in the world water scarcity is a problem. This can be due to physical causes, such as the reduced amount of rainfall, recharging the groundwater aquifers and the rivers; or due to economic factors, meaning that there is no financial capacity to construct and maintain adequate water supply systems. Water scarcity leads to water stress, meaning that the demand for safe drinking water is higher than the availability of water resources with sufficient capacity. Water stress can result in insufficient supply, causing unmet water demands or over-extraction, leading to the depletion of groundwater and surface water resources. In the meantime water demand is increasing. With increasing population, urbanisation and industrialisation, more water has to be supplied to cities. These growing demands and demographic changes are also impacting the drainage system and the pollution of the urban environment. The growing water demand requires greater capacity from water sources. These water sources are mostly not found in the vicinities of cities. A clear example is the water supply for Johannesburg and Pretoria, which sources their water from the Vaal dam. The Lesotho Highlands project, approximately 500km away, entails several dams and tunnels in the mountains, to enforce the Vaal river and Vaal dam in order to secure the water supply to this urbanised area. Water scarcity isn,t only a quantity issue, but also a quality one. When sources are too polluted to be a reliable source, water availability is under stress. Therefore, major efforts are made to improve the surface water quality. In the past the main focus was to remove organic matter from the wastewater, but more recently advanced nutrient removal is applied to avoid eutrophication of the receiving water bodies. Future emphasis will be laid on the removal of Endocrine disrupting compounds such as pesticides and pharmaceuticals to avoid the accumulation of persistent organics in the environment. With the increased efforts in wastewater treatment, the effluent of the wastewater treatment plant sometimes obtains a better quality than the receiving water bodies. In these cases, the question arises whether the effluent isn't a better source for drinking and industrial water supply than some surface water bodies. Several examples exist. The most famous one is in Windhoek, Namibia, where about 25% of the domestic water supply consists of reclaimed wastewater. The effluent of the wastewater treatment plant is extensively treated with ozone, coagulation, flotation, activated carbon filtration, membrane filtration and chlorination. The product can compete with the water obtained from the original source about 300 km away from the city. Another example is the water supply for Dow Chemical in Terneuzen, the Netherlands. The industrial water supply is fed by the wastewater treatment plant of the city of Terneuzen. It appears to be more economical to treat the effluent than to desalinate the salt water flowing in front of the industrial area. In this way the urban water cycle is closed. Thank you for your attention and I hope to see you again for the next lecture on 'the design aspects of drinking water treatment'!",
    "2": "So let's get started. All of classical information, can be written down in terms of classical bits. As a result, all of classical computing, communication and cryptographic systems, work with classical bits. As you might imagine, quantum cryptography, instead works with quantum bits, or in short qubits. Qubits are rather different than classical bits, for example we'll see that they cannot be copied. So what precisely are these qubits ? Before we start with qubits let's briefly go back to classical bits. A classical bit is just zero or one. But it will be useful to write these classical bits slightly differently. We will associate both of these bits with a vector. For zero I will pick this vector, and for the one I'll take this one. These two vectors are orthogonal so we can think that there is an axis which I’ll call alpha, with the vector zero, and another axis beta with the vector one. So far so good, we haven't really done anything there, we've just written the bits in another way. So how about these qubits? If I draw these bits as vectors, it's now somehow intuitive that we may not just have orthogonal vectors. But we may also consider a vector in the middle. This funny notation is how people in quantum information like to write vectors. We can of course express it in terms of the coordinates alpha and beta. So written in terms of the original classical bits, I can express the vector as alpha times zero plus beta times one. We also say that such qubit can be a superposition of zero and one. So a qubit has a little of the zero and a littleof the one bit in it at the same time. You are maybe wondering if any vector can be a qubit, for example could this vector also be a qubit? We will make a demand: namely that the length of the qubit vector is still the same as the length of the classical vectors. For classical bits, we have taken vectors which have exactly length one. So we demand that our qubits also correspond to the vectors that have length exactly one. And if alpha and beta are real numbers, as I've drawn them here, that just means that the square of alpha plus the square of beta equals one. So this is a qubit. This is a rather mathematical description, but you might be wondering what do qubits really look like in the real world? What does this mathematical expression model? There are of course a lot of possible experimental implementations of qubits. We will actually not be concerned with them in this lecture series, but it's good to have a quick look to get an intuition about what superposition really means. When we consider a maze, and we were to associate 0 with left and 1 with right, then classically I can send a particle either to the left or to the right. However quantumly, we can do left and right at the same time. This means that the particle would be in a superposition of being on the left and on the right. Another example is a physical implementation in which we represent bits by energy levels in an atom. We can say that the ground state, or the lower one, is called zero, and the higher one is called one. A classical bit means that my systems will be in one of these two states, so either in the ground state, or in the excited state. A qubit can be in a superposition between zero and one that is effectively in the ground and the excited state at the same time. To construct the qubits, we started from the classical bits as vectors. And these vectors have a special name, namely we will call them the standard basis, or sometimes also the computational basis. So our qubit looks like this, and the question is what are these numbers alpha and beta? We'll call these numbers amplitudes, and even though I've drawn them on the real number sphere, they can in general be complex numbers. A qubit is thus some element of a complex vector space of dimension two. Let's see how we can express the demand that our vectors should have length one. We have this very peculiar way of writing vectors, in terms of kets and bras. So this is what we will call a ket, and if I turn it around it becomes a bra. So what does this mean ? We take the vector and we compute its conjugate transpose. The star will be our notation for conjugate, and the big T will be our notation for transpose. Okay, so a bra is nothing else than a short way of writing the conjugate transpose. Let me actually do that. [Calculation] It will be convenient for us to look at the inner product between two vectors. In terms of the vectors: the ket and the bra, giving us the bracket, we can now write this quite simply, where of course we'll be lazy and omit the double middle stripe and just write one. To give an example, this means that if I take the inner product [Calculation] I can also write this as alpha squared plus beta squared. So the condition that the vector has length one can now be written in terms of the inner product. So what we’ll get, is that the inner product of the vector with itself needs to be one. So in terms of the alpha and the beta, this means that the sum of alpha squared and beta squared is one. So this is a qubit. Let's have a look at an example. I will draw a real example, so alpha and beta will be real numbers. let's remember that these vectors define the standard basis. The state that I want to consider, the qubit that I’m going to consider, is the one which is just in the middle between zero and one. We often write this state as ket plus state, which is an equal superposition between zero and one Let's now check that's a valid qubit. It's obviously a vector, so this is a good start. So what we want, is we want to check that the length of the vector is one. So let's do that! [Calculation] Of course another example of a qubit would be another direction, for example this one.",
    "3": "Wherever you get your drinking water from, the tap, a bottle or a source; your water will not just contain pure H2O. But what is in your water? And, more importantly, what should definitely not be in there? Welcome to the Drinking Water Treatment lecture about Water Quality Aspects. My name is Doris van Halem and I’m an Assistant Professor in Drinking Water. Today I’m your lecturer and I will introduce you to five groups of water quality parameters that are used to assure safe drinking water supply: first, organisms in drinking water, Health-related, chemical parameters, Aesthetics of drinking water and Operational parameters. well the first group of key compounds in water consists of living creatures, which you most often cannot see with the naked eye. In nature, water contains all kinds of organisms and they are present in large numbers. Also in drinking water many different organisms are found. Generally speaking, these organisms can be divided, based on their size, into four groups: higher organisms, protozoa, bacteria, and viruses. Higher organisms have a size between 0.05 and 10mm. And just to give you an idea, a kubic meter of water may contain over 50,000 of these organisms, which gives the impression of a large zoo and seems unsuitable for drinking and to be avoided by vegetarians. However, fortunately a characteristic of most of these higher organisms is that they are harmless to human health. Their presence in drinking water is only aggravating if they are detectable by the naked eye. An overview of higher organisms as typically found in drinking water is depicted in the slide. Also smaller micro-organisms, such as bacteria, can be abundantly found in water – and the majority of them does not effect your health. However, there are also micro-organisms that are harmful to human health, the so called pathogenic micro-organisms. Pathogenic micro-organisms, or pathogens in short, are not present in water by nature, but they enter the water through feces and urine from humans and animals. Pathogens have difficulty to survive in natural water, because the temperature of water is lower than body temperature. An important source of pathogens in surface water is the continuous supply of un-treated or not fully treated wastewater from domestic and bio-industry. Pathogenic micro-organisms can cause different diseases, which may become epidemic rather quickly in water. They can be divided into three main groups, each with their own individual characteristics: protozoa, which are single cell animals and can cause diarrhea and stomach complaints; bacteria, responsible for typhoid fever and cholera; And virusses, the smallest organism of the three groups, and responsible for hepatitis and polio. The size of bacteria, viruses and different particulate matter in water determines to a great extend their removal potential by treatment processes. There are analytical techniques to determine what pathogens are in your water, but for safety reasons, it is preferred to measure indicator organisms and not the real disease-causing organisms. For bacteria, coliforms or E.coli are used as indicator organisms, they can also be found in human and animal feaces, but can be more safely cultured and counted as colony forming units per volume. For virus indicators, mostly bacteriophages are used which form plaque forming units per volume. This picture shows an example of a plated sample for E.coli counts, each dot represents a colony forming unit. In the guidelines of the World Health Organisation it is stated that when testing a 100mL sample no E.coli may be found. Well, lets get to the second group of parameters important for safe drinking water which consists of dissolved compounds, both inorganic and organic. A subdivision can also be made based on the concentrations present in the water, as macro-pollutants for concentrations over 1 mg/L and micropollutants for concentrations below 1 mg/L. The long list of health related, chemical constituents can be divided roughly into: Metals and related substances (such as, lead, mercury, arsenic) Organic micro-pollutants (for example, pesticides and hormones) Disinfection byproducts (such as, bromate or trihalomethanes) And other compounds (which include nitrate and fluoride) Guidelines for the maximum allowable concentration exist for these substances based on considerations, such as, health impact, detection limit of measuring device or the available removal technologies. The impact of a substance on human health depends strongly on the specific constituent, making dose-response and epidemiological studies crucial. Most countries have their own guidelines for these parameters, there are also international guidelines, for example of the World Health Organization and the European Union. In EU guidelines a maximum allowable health risk of 10-5 is used for the formulation of the guidelines. In other words, it is accepted that no more than 1 in 100,000 persons can develop an illness for each contaminant. well let’s briefly go through the different chemical contaminants that can threat safe drinking water supply. Metals and related substances include well-known heavy metals, such as mercury and lead, but also the metalloid arsenic. These contaminants originate from different sources, including natural release into the water from sediments, through mining activities, and industrial or domestic discharges. These metals and related substances pose a serious threat to human health as they are often carcinogenic and may cause illnesses to the bladder, lungs and other organs. Another group of chemical compounds that may be present in source water are organic micro-pollutants, shortly called OMPs. This group includes contaminants such as pesticides pharmaceuticals human care products drugs and gasoline. Depending on the dose these compounds may be genotoxic and endocrine disrupting. A particular concern is the potential mixture of many different of these organic micropollutants, such a cocktail of substances, as it is difficult to detect and to remove. Disinfection by-products are originally not present in the source water, but enter the water during a treatment process for disinfection. Disinfectants react with natural organic matter or bromide in the water, resulting in carcinogenic by-products. Trihalomethanes, NDMA and bromate are examples of DBPs which are produced by chlorine, chloramine and ozone, respectively. In the group “other compounds” I want to mention nitrate and fluoride specifically. Nitrate enters the water in areas with agriculture and is particularly of concern for babies and pregnant women, as nitrate is transformed to nitrite in the body. Fluoride can be naturally present in groundwater and at elevated levels is also threat to human health, as it may cause brittle bones are tooth decay. Now you have been introduced to the most important parameters that can have an impact on human health. These microbial and chemical parameters are crucial for the safety of drinking water. However, also the consumer comfort and perception is a key component to take into account. Therefore the third group of water quality parameters is “aesthetics of drinking water”. The water should not only be safe, but also be perceived as safe. Odour and taste are subjective parameters which cannot be detected by any device. The acceptable taste and odour of water are determined by using consumer panels, which determine whether they detect any unpleasant flavor or smell to the water. Colour and turbidity can be measured in the water. As well as the constituents influencing the aesthetics of drinking water, such as iron and sulphate. Also, undissolved matter or suspended solids can cause discoloration or turbidity to the water. Undissolved matter consists of large or small particles which have not been dissolved in the water. This is a different group of contaminants than the earlier mentioned chemical substances, as these are generally dissolved in water. A distinction based on size is made between suspended and colloidal matter. The diameter and specific gravity of particles are important for removing them from the water. Colloidal particles have are very small and have a specific gravity similar to water. Colloidal particles generally have a negative electric charge and their electrostatic repulsion makes them difficult to remove. Operational water quality parameters are very important for drinking water supply companies. The objective of these parameters is to assure that the water quality in maintained post-treatment. So, the water quality should not change because of storage in reservoirs, or distribution and use within households, such as heating. A distribution network can be an important source of contaminants, as organisms may grow in it. Important parameters contributing to growth in the distribution network are the organisms themselves, but also nutrients, temperature, bicarbonate for buffer capacity and dissolved oxygen. Apart from growth during distribution, also dissolution of pipelines should be prevented, so aggressive water should be prevented. For this reason saturation index, hardness, and pH. but also, hardness of water, as calcium and magnesium ions, is important as it may cause scaling in heating installations, a film on tea, and less effective use of detergents. Now you know the main water quality parameters that you need to pay attention to when designing a water treatment plant. Also, you have learned about their effect on human health, consumer demands and operation. The next step will be to learn more about how you can remove these constituents from the water. We have come to the end of this lecture, so I want to thank you for watching. And please use the discussion board for your questions.",
    "6": "If you've had surgery, you might remember starting to count backwards from ten, nine, eight, and then waking up with the surgery already over before you even got to five. And it might seem like you were asleep, but you weren't. You were under anesthesia, which is much more complicated. You were unconscious, but you also couldn't move, form memories, or, hopefully, feel pain. Without being able to block all those processes at once, many surgeries would be way too traumatic to perform. Ancient medical texts from Egypt, Asia and the Middle East all describe early anesthetics containing things like opium poppy, mandrake fruit, and alcohol. Today, anesthesiologists often combine regional, inhalational and intravenous agents to get the right balance for a surgery. Regional anesthesia blocks pain signals from a specific part of the body from getting to the brain. Pain and other messages travel through the nervous system as electrical impulses. Regional anesthetics work by setting up an electrical barricade. They bind to the proteins in neurons' cell membranes that let charged particles in and out, and lock out positively charged particles. One compound that does this is cocaine, whose painkilling effects were discovered by accident when an ophthalmology intern got some on his tongue. It's still occasionally used as an anesthetic, but many of the more common regional anesthetics have a similar chemical structure and work the same way. But for major surgeries where you need to be unconscious, you'll want something that acts on the entire nervous system, including the brain. That's what inhalational anesthetics do. In Western medicine, diethyl ether was the first common one. It was best known as a recreational drug until doctors started to realize that people sometimes didn't notice injuries they received under the influence. In the 1840s, they started sedating patients with ether during dental extractions and surgeries. Nitrous oxide became popular in the decades that followed and is still used today. although ether derivatives, like sevoflurane, are more common. Inhalational anesthesia is usually supplemented with intravenous anesthesia, which was developed in the 1870s. Common intravenous agents include sedatives, like propofol, which induce unconsciousness, and opioids, like fentanyl, which reduce pain. These general anesthetics also seem to work by affecting electrical signals in the nervous system. Normally, the brain's electrical signals are a chaotic chorus as different parts of the brain communicate with each other. That connectivity keeps you awake and aware. But as someone becomes anesthetized, those signals become calmer and more organized, suggesting that different parts of the brain aren't talking to each other anymore. There's a lot we still don't know about exactly how this happens. Several common anesthetics bind to the GABA-A receptor in the brain's neurons. They hold the gateway open, letting negatively charged particles flow into the cell. Negative charge builds up and acts like a log jam, keeping the neuron from transmitting electrical signals. The nervous system has lots of these gated channels, controlling pathways for movement, memory, and consciousness. Most anesthetics probably act on more than one, and they don't act on just the nervous system. Many anesthetics also affect the heart, lungs, and other vital organs. Just like early anesthetics, which included familiar poisons like hemlock and aconite, modern drugs can have serious side effects. So an anesthesiologist has to mix just the right balance of drugs to create all the features of anesthesia, while carefully monitoring the patient's vital signs, and adjusting the drug mixture as needed. Anesthesia is complicated, but figuring out how to use it allowed for the development of new and better surgical techniques. Surgeons could learn how to routinely and safely perform C-sections, reopen blocked arteries, replace damaged livers and kidneys, and many other life-saving operations. And each year, new anesthesia techniques are developed that will ensure more and more patients survive the trauma of surgery.",
    "5": "Welcome, in this clip we will focus on sedimentary rocks. Movements in the Earth have created mountains. Erosion will start and the mountains will produce sediments which we call terrigenous sediments. This is the source of our sedimentary system. The sediments will be transported by torrents through the mountains, will reach large open valleys and will eventually enter the sea where they will accumulate. This is the source-to-sink system, a key concept to understand sedimentary rocks and the resources they contain. Sedimentary rocks are of crucial importance for society and, therefore, for this course. The largest cities on Earth are on sedimentary rocks. Mankind as a whole depends on the responsible use of the subsurface. Water for agriculture and drinking, geothermal energy, an increasing number of subsurface infrastructures, hydrocarbons, mining and storage of CO2 are all examples of the increasing use we make of the subsurface . The source-o-sink system works somewhat like the economy. The source is comparable to the factory where the goods are produced. These are the sediments in our case. The sediments are then transported by different means end eventually will reach the market which, in our sedimentary system, is called the sink. Let us look first at how sediments are produced. Expanding and contracting ice, rain and wind are all major agents breaking mountain rocks and producing sediments. In addition, rain water, often acid, chemically attacks less stable minerals, such as feldspars producing clay minerals. Vertical movements in the source area are of great importance in sedimentary systems. If the source area experiences strong vertical movement, relief is rejuvenated and sediment production can continue for a long time. If, on the contrary, vertical movements decrease or even end, the source area will become less productive eventually producing no sediments at all. Vertical movements, rock types and climate in the source area are the key factors controlling the amount and type of sediments produced. Eroded sediments are taken over by water or wind and transported to the sea. In the mountains they will be carried by irregularly flowing torrents and creeks; in the foothills and in the wide gentle plains, rivers will be wider, often meandering and will regularly flood the surrounding plains. Eventually, the rivers will reach the sea, often forming large deltas and delivering the sediments to their final sink, the marine domain. A key concept in the analysis of sedimentary succession is that of accommodation space which is the vertical dimension of the space where sediments can be deposited and preserved. In the marine case, this corresponds to the height of the water column between sea level and sea floor. In the case shown in the picture, the accommodation space could be around 10m. If nothing else changes, sediments delivered to the sea will gradually fill up the accommodation space and the maximum sediment thickness will be comparable to that of the accommodation space, that is 10 meters. In the slide the remaining accommodation space is 4 meters. However, sedimentary successions in the world can be kilometers thick. We are clearly missing something important. The existence of thick sedimentary successions roughly deposited at the same water depth, requires an increase of accommodation space through time. This is achieved by letting the lower boundary of the accommodation zone descend through time. The downward movement of the surface of the Earth, which is called subsidence, is a process of key importance in Geology and in our MOOC. Sediments eroded from the mountains are not the only type of sediments you will find in the sea. Away from deltas and in areas where the waters are clean, carbonate sediments can develop. These are sediments created by organisms and, therefore, are necessarily linked to life. This is the domain of coral reefs, of large mud flats covered by algae and of clean white sands of shells and biological grains.",
    "1": "Hi, my name is Stefaan Pommé and together with you I want to explore radioactivity. Radioactive decay is a spontaneous, stochastic process whereby an unstable nucleus transforms into another nucleus. Different types of ionizing radiation can be emmitted, such as helium nuclei, called alpha particles, high-energy photons, called gamma-rays, and electrons or positrons, called beta particles. Radioactivity is omnipresent in nature. In soil, we find for example uranium-238, which decays to a series of other unstable nuclides. This decay chain stops when stable lead is reached. Each radionuclide has its own half-life by which half of the atoms are expected to decay. Unlike with an hour glass, it is not so that all atoms are decayed after two half-lives, The number of surviving atoms follows an exponential curve. Radioactivity is used as a clock in geochronology. We know that Earth is 4.5 billion years old because all short-lived nuclides have decayed away and only nuclides with long half-lives have survived with time. On the chart of nuclides, we can see the different types of radioactive decay. The black dots represent the 'valley of stability'. In the purple region, nuclides are in excess of protons and decay by beta-plus emission or electron capture to reduce the proton number. In orange are the nuclides with too many neutrons; they can convert a neutron into a proton by beta minus decay. The heavy nuclides, in yellow and green, can go on a nuclear diet by emitting a heavy alpha particle or take a crash diet by splitting into two fragments. Beta minus decay is the only spontaneous process in which a nucleus can cure from a neutron excess, by transforming a neutron into a proton and emitting an electron and an anti-neutrino from inside the nucleus. The mass number stays the same, so it is an isobaric process. The anti-neutrino cannot be detected, but we know it takes away on average 2 thirds of the released energy, leaving the beta particle with the remaining fraction, which can be any value between zero and the Q-value. In an energy diagram, we start from an unstable nucleus with high energy, jump to a lower energy state in the daughter product, and possibly go further down by gamma transitions. Whereas the exitation energy is usually expelled through emmission of gamma rays, sometimes the energy is converted to kick out an orbital electron, which is then called an conversion electron. Most excited levels in a nucleus are very short-lived but some live long enough to be called meta-stable states and their decay is called isomeric transition. Beta plus decay is somewhat opposite to beta minus decay: it converts a proton into a neutron, with emission of a positron and a neutrino. The positron is the antimatter counterpart of an electron and has a positive charge. There is also a difference with respect to the energy diagram: beta plus decay requires 1 MeV for the creation of an electron-positron pair in the nucleus, so that less energy is available to the positron and neutrino. This is what happens with the emitted positron. it meets an electron and together they annihilate and matter is converted into the pure energy in the shape of two 511 keV photons which fly away in opposite directions. Nuclear fireworks, which has a nice application in PET-scanners for medical imaging. Beta plus decay has a competitor, namely electron capture decay. The nucleas steals away an electron from one of the inner atomic shells and then converts a proton into a neutron and emits a neutrino. After electron capture, the atom has some rearranging to do. If a K-shell electron has been captured, an electron of a higher shell will take its place and an x-ray is emitted, or an Auger electron. This results in one or two gaps in higher shells which in turn have to be filled. Notice how atoms and nuclei deexcite is similar ways. When electrons rearrange in atoms, de keV exitation energy is taken away by x-rays or Auger electrons. When nucleons rearrange, gamma rays in the MeV range are emmitted or conversion electrons. Alpha decay is a sport played by the heavy weight class of nuclides. Excessive mass is lost by emission of a helium nucleus, which a kinetic energy of 4 to 8 MeV. In classical physics, an alpha particle cannot escape from the nucleus, because of the Coulomb barrier. In quantum mechanicsis it is possible to tunnel through the barrier. Even heavier particle clusters can escape, but that happens very rarely. Then what to think of spontaneous fission of a nucleus? This was believed energetically impossible at its discovery, but later it was realised that the potential barrier could be overcome if a big nucleus would stretch like a water drop into a dumbbell shape and then break apart. In reactors, fission is induced by neutrons which give the nucleus that extra binding energy it needs to pass the barrier. And with this bombshell we have reached the end of this decadent course. Thank you for your attention and don't lose your excitement!",
    "9": "Depression is the leading cause of disability in the world. In the United States, close to 10% of adults struggle with depression. But because it's a mental illness, it can be a lot harder to understand than, say, high cholesterol. One major source of confusion is the difference between having depression and just feeling depressed. Almost everyone feels down from time to time. Getting a bad grade, losing a job, having an argument, even a rainy day can bring on feelings of sadness. Sometimes there's no trigger at all. It just pops up out of the blue. Then circumstances change, and those sad feelings disappear. Clinical depression is different. It's a medical disorder, and it won't go away just because you want it to. It lingers for at least two consecutive weeks, and significantly interferes with one's ability to work, play, or love. Depression can have a lot of different symptoms: a low mood, loss of interest in things you'd normally enjoy, changes in appetite, feeling worthless or excessively guilty, sleeping either too much or too little, poor concentration, restlessness or slowness, loss of energy, or recurrent thoughts of suicide. If you have at least five of those symptoms, according to psychiatric guidelines, you qualify for a diagnosis of depression. And it's not just behavioral symptoms. Depression has physical manifestations inside the brain. First of all, there are changes that could be seen with the naked eye and X-ray vision. These include smaller frontal lobes and hippocampal volumes. On a more microscale, depression is associated with a few things: the abnormal transmission or depletion of certain neurotransmitters, especially serotonin, norepinephrine, and dopamine, blunted circadian rhythms, or specific changes in the REM and slow-wave parts of your sleep cycle, and hormone abnormalities, such as high cortisol and deregulation of thyroid hormones. But neuroscientists still don't have a complete picture of what causes depression. It seems to have to do with a complex interaction between genes and environment, but we don't have a diagnostic tool that can accurately predict where or when it will show up. And because depression symptoms are intangible, it's hard to know who might look fine but is actually struggling. According to the National Institute of Mental Health, it takes the average person suffering with a mental illness over ten years to ask for help. But there are very effective treatments. Medications and therapy complement each other to boost brain chemicals. In extreme cases, electroconvulsive therapy, which is like a controlled seizure in the patient's brain, is also very helpful. Other promising treatments, like transcranial magnetic stimulation, are being investigated, too. So, if you know someone struggling with depression, encourage them, gently, to seek out some of these options. You might even offer to help with specific tasks, like looking up therapists in the area, or making a list of questions to ask a doctor. To someone with depression, these first steps can seem insurmountable. If they feel guilty or ashamed, point out that depression is a medical condition, just like asthma or diabetes. It's not a weakness or a personality trait, and they shouldn't expect themselves to just get over it anymore than they could will themselves to get over a broken arm. If you haven't experienced depression yourself, avoid comparing it to times you've felt down. Comparing what they're experiencing to normal, temporary feelings of sadness can make them feel guilty for struggling. Even just talking about depression openly can help. For example, research shows that asking someone about suicidal thoughts actually reduces their suicide risk. Open conversations about mental illness help erode stigma and make it easier for people to ask for help. And the more patients seek treatment, the more scientists will learn about depression, and the better the treatments will get.",
}

with open("../../data/nirmal_targets.csv", 'w') as outf:
    for u, v in docs_target.items():
        outf.write(f"{u}\t{v}\n")

## Get URL contents at the time.

In [354]:
%reload_ext autoreload
%autoreload 2
from wayback_api import get_from_wayback
urls_to_fetch = set()
for u in dataset:
    for clicks in u['clicks']:
        urls_to_fetch.add(clicks['url'])

In [357]:
docs_content = {}

In [367]:
len(docs_content)

383

In [None]:
g

In [358]:
timestamp = "20191022"
for u in tqdm(urls_to_fetch):
    if u in docs_content:
        continue
    content = get_from_wayback(u, timestamp)
    if len(content.strip()) == 0:
        continue
    docs_content[u] = content

  0%|          | 0/538 [00:00<?, ?it/s]

	 Could not get anyhing good from page...
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	 Could not get anyhing good from page...
	Not on archive! Trying today....
	 Could not get anyhing good from page...
	 Could not get anyhing good from page...
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive! Trying today....
	Not on archive!

In [364]:
with open("../../data/clicked_docs_nirmal.tsv", 'w') as outf:
    for u in docs_content:
        topic = docs_topic[u]
        clean_text = docs_content[u].replace("\n", " ").replace("\t", " ")
        outf.write(f"{u}\t{topic}\t{clean_text}\n")