In [1]:
import os
import sys
sys.path.append("../")

import pandas as pd
from tqdm import tqdm

from langchain.document_loaders import PyPDFLoader

from utils.arxiv_utils import get_inspire_hep_papers, extract_arxiv_ids, download_arxiv_source, remove_latex_preamble
from utils.db_utils import update_dataframe, delete_files_except_extensions, get_filenames_with_extensions, scrape_website_text

%load_ext autoreload
%autoreload 2

In [15]:
victim = "MarieCurie"#"Oppie"
#victim_inspire_ID = #"J.Robert.Oppenheimer.1"
#cutoff_year = None

In [16]:


pdf_dir = f'../data/{victim}/papers/'  # directory to store PDFs
db_dir = f'../data/{victim}/db/'  # directory to store database CSVs
txt_dir = f'../data/{victim}/interviews/'  # directory to store interview transcripts

try:
    os.makedirs(pdf_dir)
except FileExistsError:
    print(f"Directory '{pdf_dir}' already exists")

try:
    os.makedirs(db_dir)
except FileExistsError:
    print(f"Directory '{db_dir}' already exists")

try:
    os.makedirs(txt_dir)
except FileExistsError:
    print(f"Directory '{txt_dir}' already exists")

Directory '../data/MarieCurie/papers/' already exists
Directory '../data/MarieCurie/db/' already exists
Directory '../data/MarieCurie/interviews/' already exists


## 1. Papers

In [3]:
# Get papers from INSPIRE-HEP, with a year cutoff
papers = get_inspire_hep_papers(victim_inspire_ID,year_cutoff= 1930) 
print(papers)

# Extract arXiv IDs from papers
arxiv_ids = extract_arxiv_ids(papers) 

# Download papers (sources if available, otherwise PDFs)
[download_arxiv_source(arxiv_id, output_dir=pdf_dir) for arxiv_id in tqdm(arxiv_ids)];  

[{'links': {'bibtex': 'https://inspirehep.net/api/literature/47479?format=bibtex', 'latex-eu': 'https://inspirehep.net/api/literature/47479?format=latex-eu', 'latex-us': 'https://inspirehep.net/api/literature/47479?format=latex-us', 'json': 'https://inspirehep.net/api/literature/47479?format=json', 'cv': 'https://inspirehep.net/api/literature/47479?format=cv', 'citations': 'https://inspirehep.net/api/literature/?q=refersto%3Arecid%3A47479'}, 'id': '47479', 'created': '2008-09-22T00:00:00+00:00', 'metadata': {'control_number': 47479}, 'updated': '2023-03-07T06:53:03.817337+00:00'}, {'links': {'bibtex': 'https://inspirehep.net/api/literature/46695?format=bibtex', 'latex-eu': 'https://inspirehep.net/api/literature/46695?format=latex-eu', 'latex-us': 'https://inspirehep.net/api/literature/46695?format=latex-us', 'json': 'https://inspirehep.net/api/literature/46695?format=json', 'cv': 'https://inspirehep.net/api/literature/46695?format=cv', 'citations': 'https://inspirehep.net/api/literatur

0it [00:00, ?it/s]


In [4]:
# Delete all files except PDFs and TeX files; load relevant files list
delete_files_except_extensions(pdf_dir, ['.pdf', '.tex'])

In [4]:
filenames = get_filenames_with_extensions(pdf_dir, ['.tex', '.pdf'])

In [17]:
# Get text
source_type = []
text = []

for i, file in enumerate(tqdm(filenames)):
    try:
        if os.path.splitext(file)[-1] == '.pdf':
            loader = PyPDFLoader("{}/{}".format(pdf_dir, file))
            pages = loader.load_and_split()
            text.append(''.join([page.page_content for page in pages]))
            source_type.append("paper")
        elif os.path.splitext(file)[-1] == '.tex':
            with open("{}/{}".format(pdf_dir, file), 'r', encoding='iso-8859-1') as f:
                text.append(remove_latex_preamble(f.read()))
                source_type.append("paper")
        print("Loaded file {}".format(file))
    except:
        print("Error with file {}".format(file))

  0%|                                                                                                                          | 0/6 [00:00<?, ?it/s]

Loaded file CR1898_p1101.pdf


 67%|████████████████████████████████████████████████████████████████████████████                                      | 4/6 [00:01<00:00,  3.68it/s]Ignoring wrong pointing object 2 65536 (offset 0)
Ignoring wrong pointing object 9 65536 (offset 0)
Ignoring wrong pointing object 15 65536 (offset 0)
Ignoring wrong pointing object 21 65536 (offset 0)
Ignoring wrong pointing object 27 65536 (offset 0)
Ignoring wrong pointing object 33 65536 (offset 0)
Ignoring wrong pointing object 39 65536 (offset 0)
Ignoring wrong pointing object 45 65536 (offset 0)
Ignoring wrong pointing object 51 65536 (offset 0)
Ignoring wrong pointing object 57 65536 (offset 0)
Ignoring wrong pointing object 63 65536 (offset 0)
Ignoring wrong pointing object 69 65536 (offset 0)
Ignoring wrong pointing object 75 65536 (offset 0)
Ignoring wrong pointing object 81 65536 (offset 0)
Ignoring wrong pointing object 87 65536 (offset 0)
Ignoring wrong pointing object 93 65536 (offset 0)
Ignoring wrong pointing object 99 655

Loaded file marie_curie.pdf
Loaded file radium-and-radioactivity-1904.pdf
Loaded file CO14_2p074.pdf


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.90it/s]

Loaded file Curie_et_al_RevModPhys1931.pdf
Loaded file yjbm00205-0023.pdf





In [18]:
data = [source_type, text]

# Transpose the data to have the inner lists as rows
transposed_data = list(map(list, zip(*data)))

# Column names for the DataFrame
columns = ['source_type','text']

# Create the DataFrame
df = pd.DataFrame(transposed_data, columns=columns)

In [19]:
# Update dataframe
df = update_dataframe('{}/df_text.csv'.format(db_dir), df)
df.to_csv('{}/df_text.csv'.format(db_dir), index=False)

## 2. YouTube videos

In [9]:
# from tqdm import tqdm
# import whisper
# import pytube
# from pathlib import Path
# import subprocess
# import numpy as np

In [10]:
# videos_dir = "../data/videos/"

# try:
#     os.makedirs(videos_dir)
# except FileExistsError:
#     print(f"Directory '{videos_dir}' already exists")

In [11]:
# # Get whisper model; download weights if necessary
# whisper_model = whisper.load_model("tiny.en").to('cpu')
# options = whisper.DecodingOptions(language="en", without_timestamps=True)

# url = "https://www.youtube.com/watch?v=dqxdPNzBY0I"
# pytube_vid = pytube.YouTube(url)
# video_path_local = Path(videos_dir).resolve() / (pytube_vid.video_id+".mp4")
# pytube_vid.streams.filter(type="audio", mime_type="audio/mp4", abr="48kbps").first().download(output_path=video_path_local.parent, filename=video_path_local.name)
# video_path_local = video_path_local.with_suffix(".wav")
# result  = subprocess.run(["ffmpeg", "-i", str(video_path_local.with_suffix(".mp4")), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(video_path_local)])
# transcription = whisper.transcribe(whisper_model, str(video_path_local))

## 3. Interviews

In [12]:
filenames = get_filenames_with_extensions(txt_dir, ['.txt'])

In [13]:
source_type = []
text = []

for i, file in enumerate(tqdm(filenames)):
    try:
        with open("{}/{}".format(txt_dir, file), 'r') as f:
            text.append(f.read())
            source_type.append("interview")
    except:
        print("Error with file {}".format(file))

0it [00:00, ?it/s]


In [21]:
# Example text chunks
text_chunks = [
    "Warsaw was then under Russian domination, and one of the worst aspects of this control was the oppression exerted on the school and the child. The private schools directed by Poles were closely watched by the police and overburdened with the necessity of teaching the Russian language even to children so young that they could scarcely speak their native Polish. Nevertheless, since the teachers were nearly all of Polish nationality, they endeavored in every possible way to mitigate the difficulties resulting from the national persecution. These schools, however, could not legally give diplomas, which were obtainable only in the schools of the government. These schools, entirely Russian, were directly opposed to the Polish national spirit. All instruction was given in Russian, by Russian professors, who, being hostile to the Polish nation, treated their pupils as enemies. Men of moral and intellectual distinction could scarcely agree to teach in schools where an alien attitude was forced upon them. So what the pupils were taught was of questionable value, and the moral atmosphere was altogether unbearable. Constantly held in suspicion and spied upon, the children knew that a single conversation in Polish, or an imprudent word, might seriously harm, not only themselves, but also their families. Amidst these hostilities, they lost all the joy of life, and precocious feelings of distrust and indignation weighed upon their childhood. On the other side, this abnormal situation resulted in exciting the patriotic feeling of Polish youths to the highest degree.(from Autobiographical Notes pp. 158-159.)",
    "I was only fifteen when I finished my high-school studies, always having held first rank in my class. The fatigue of growth and study compelled me to take almost a year’s rest in the country. I then returned to my father in Warsaw, hoping to teach in the free schools. But family circumstances obliged me to change my decision. My father, now aged and tired, needed rest; his fortune was very modest. So I resolved to accept a position as governess for several children. Thus, when scarcely seventeen, I left my father’s house to begin an independent life. That going away remains one of the most vivid memories of my youth. My heart was heavy as I climbed into the railway car. It was to carry me for several hours, away from those I loved. And after the railway journey I must drive for five hours longer. What experience was awaiting me? So I questioned as I sat close to the car window looking out across the wide plains.(from Autobiographical Notes p. 163.)",
    "I continued my efforts to educate myself. This was no easy task under the Russian government of Warsaw; yet I found more opportunities than in the country. To my great joy, I was able, for the first time in my life, to find access to a laboratory: a small municipal physical laboratory directed by one of my cousins. I found little time to work there, except in the evenings and on Sundays, and was generally left to myself. I tried out various experiments described in treatises on physics and chemistry, and the results were sometimes unexpected. At times I would be encouraged by a little unhoped-for success, at others I would be in the deepest despair because of accidents and failures resulting from my inexperience. But on the whole, though I was taught that the way of progress is neither swift nor easy, this first trial confirmed in me the taste for experimental research in the fields of physics and chemistry. Other means of instruction came to me through my being one of an enthusiastic group of young men and women of Warsaw, who united in a common desire to study, and whose activities were at the same time social and patriotic. It was one of those groups of Polish youths who believed that the hope of their country lay in a great effort to develop the intellectual and moral strength of the nation, and that such an effort would lead to a better national situation. The nearest purpose was to work at one’s own instruction and to provide means of instruction for workmen and peasants. In accordance with this program we agreed among ourselves to give evening courses, each one teaching what he knew best. There is no need to say that this was a secret organization, which made everything extremely difficult. There were in our group very devoted young people who, as I still believe today, could do truly useful work. I have a bright remembrance of the sympathetic intellectual and social companionship which I enjoyed at that time. Truly the means of action were poor and the results obtained could not be considerable; yet I still believe that the ideas which inspired us then are the only way to real social progress. You cannot hope to build a better world without improving the individuals. To that end each of us must work for his own improvement, and at the same time share a general responsibility for all humanity, our particular duty being to aid those to whom we think we can be most useful. (from Autobiographical Notes pp. 167-168.)",
    "It would be impossible to tell of all the good these years brought to me. Undistracted by any outside occupation, I was entirely absorbed in the joy of learning and understanding. Yet, all the while, my living conditions were far from easy, my own funds being small and my family not having the means to aid me as they would have liked to do. However, my situation was not exceptional; it was the familiar experience of many of the Polish students whom I knew. The room I lived in was in a garret, very cold in winter, for it was insufficiently heated by a small stove which often lacked coal. During a particularly rigorous winter, it was not unusual for the water to freeze in the basin in the night; to be able to sleep I was obliged to pile all my clothes on the bedcovers. In the same room I prepared my meals with the aid of an alcohol lamp and a few kitchen utensils. These meals were often reduced to bread with a cup of chocolate, eggs or fruit. I had no help in housekeeping and I myself carried the little coal I used up the six flights. This life, painful from certain points of view, had, for all that, a real charm for me. It gave me a very precious sense of liberty and independence. Unknown in Paris, I was lost in the great city, but the feeling of living there alone, taking care of myself without any aid, did not at all depress me. If sometimes I felt lonesome, my usual state of mind was one of calm and great moral satisfaction. All my mind was centered on my studies, which, especially at the beginning, were difficult. In fact, I was insufficiently prepared to follow the physical science course at the Sorbonne, for, despite all my efforts, I had not succeeded in acquiring in Poland a preparation as complete as that of the French students following the same course. So I was obliged to supply this deficiency, especially in mathematics. I divided my time between courses, experimental work, and study in the library. In the evening I worked in my room, sometimes very late into the night. All that I saw and learned that was new delighted me. It was like a new world opened to me, the world of science, which I was at last permitted to know in all liberty.(from Autobiographical Notes pp. 170-171.)"
    "I met Pierre Curie for the first time in the spring of the year 1894.... A Polish physicist whom I knew, and who was a great admirer of Pierre Curie, one day invited us together to spend the evening with himself and his wife. As I entered the room, Pierre Curie was standing in the recess of a French window opening on a balcony. He seemed to me very young, though he was at that time thirty-five years old. I was struck by the open expression of his face and by the slight suggestion of detachment in his whole attitude. His speech, rather slow and deliberate, his simplicity, and his smile, at once grave and youthful, inspired confidence. We began a conversation which soon became friendly. It first concerned certain scientific matters about which I was very glad to be able to ask his opinion. Then we discussed certain social and humanitarian subjects which interested us both. There was, between his conceptions and mine, despite the difference between our native countries, a surprising kinship, no doubt attributable to a certain likeness in the moral atmosphere in which we were both raised by our families. We met again at the Physics Society and in the laboratory. Then he asked if he might call upon me.... Pierre Curie came to see me, and showed a simple and sincere sympathy with my student life. Soon he caught the habit of speaking to me of his dream of an existence consecrated entirely to scientific research, and he asked me to share that life. It was not, however, easy for me to make such a decision, for it meant separation from my country and my family, and the renouncement of certain social projects that were dear to me. Having grown up in an atmosphere of patriotism kept alive by the oppression of Poland, I wished, like many other young people of my country, to contribute my effort toward the conservation of our national spirit.... During the year 1894 Pierre Curie wrote me letters that seem to me admirable in their form. No one of them was very long, for he had the habit of concise expression, but all were written in a spirit of sincerity and with an evident anxiety to make the one he desired as a companion know him as he was.... It is appropriate to quote here a few lines which express how he looked on the possibility of our marriage: “We have promised each other (is it not true?) to have, the one for the other, at least a great affection. Provided that you do not change your mind! For there are no promises which hold; these are things that do not admit of compulsion. “It would, nevertheless, be a beautiful thing in which I hardly dare believe, to pass through life together hypnotized in our dreams: your dream for your country; our dream for humanity; our dream for science. Of all these dreams, I believe the last, alone, is legitimate. I mean to say by this that we are powerless to change the social order. Even if this were not true we should not know what to do.... From the point of view of science, on the contrary, we can pretend to accomplish something. The territory here is more solid and obvious, and however small it is, it is truly in our possession.” One can understand, from this letter, that for Pierre Curie there was only one way of looking at the future. He had dedicated his life to his dream of science: he felt the need of a companion who could live his dream with him (from Pierre Curie pp. 72-77.)",
    "It became a serious problem how to take care of our little Irène and of our home without giving up my scientific work. Such a renunciation would have been very painful to me, and my husband would not even think of it; he used to say that he had got a wife made expressly for him to share all his preoccupations. Neither of us would contemplate abandoning what was so precious to both. Of course we had to have a servant, but I personally saw to all the details of the child’s care. While I was in the laboratory, she was in the care of her grandfather, who loved her tenderly and whose own life was made brighter by her. So the close union of our family enabled me to meet my obligations. Things were particularly difficult only in case of more exceptional events, such as a child’s illness, when sleepless nights interrupted the normal course of life. It can be easily understood that there was no place in our life for worldly relations. We saw but a few friends, scientific workers, like ourselves, with whom we talked in our home or in our garden, while I did some sewing for my little girl. We also maintained affectionate relations with my husband’s brother and his family. But I was separated from all my relatives, as my sister had left Paris with her husband to live in Poland. It was under this mode of quiet living, organized according to our desires, that we achieved the great work of our lives, work begun about the end of 1897 and lasting for many years. (from Autobiographical Notes pp. 179-180.)",
    "My experiments proved that the radiation of uranium compounds can be measured with precision under determined conditions, and that this radiation is an atomic property of the element of uranium. Its intensity is proportional to the quantity of uranium contained in the compound, and depends neither on conditions of chemical combination, nor on external circumstances, such as light or temperature. I undertook next to discover if there were other elements possessing the same property, and with this aim I examined all the elements then known, either in their pure state or in compounds. I found that among these bodies, thorium compounds are the only ones which emit rays similar to those of uranium. The radiation of thorium has an intensity of the same order as that of uranium, and is, as in the case of uranium, an atomic property of the element.... During the course of my research, I had had occasion to examine not only simple compounds, salts and oxides, but also a great number of minerals. Certain ones proved radioactive; these were those containing uranium and thorium; but their radioactivity seemed abnormal, for it was much greater than the amount I had found in uranium and thorium had led me to expect. This abnormality greatly surprised us. When I had assured myself that it was not due to an error in the experiment, it became necessary to find an explanation. I then made the hypothesis that the ores uranium and thorium contain in small quantity a substance much more strongly radioactive than either uranium or thorium. This substance could not be one of the known elements, because these had already been examined; it must, therefore, be a new chemical element. I had a passionate desire to verify this hypothesis as rapidly as possible. And Pierre Curie, keenly interested in the question, abandoned his work on crystals (provisionally, he thought) to join me in the search for this unknown substance. We chose, for our work, the ore pitchblende, a uranium ore, which in its pure state is about four times more active than oxide of uranium. Since the composition of this ore was known through very careful chemical analysis, we could expect to find, at a maximum, 1 per cent of new substance. The result of our experiment proved that there were in reality new radioactive elements in pitchblende, but that their proportion did not reach even a millionth per cent!(from Pierre Curie pp. 96-98.)",
    "The School of Physics could give us no suitable premises, but for lack of anything better, the Director permitted us to use an abandoned shed which had been in service as a dissecting room of the School of Medicine. Its glass roof did not afford complete shelter against rain; the heat was suffocating in summer, and the bitter cold of winter was only a little lessened by the iron stove, except in its immediate vicinity. There was no question of obtaining the needed proper apparatus in common use by chemists. We simply had some old pine-wood tables with furnaces and gas burners. We had to use the adjoining yard for those of our chemical operations that involved producing irritating gases; even then the gas often filled our shed. With this equipment we entered on our exhausting work. Yet it was in this miserable old shed that we passed the best and happiest years of our life, devoting our entire days to our work. Often I had to prepare our lunch in the shed, so as not to interrupt some particularly important operation. Sometimes I had to spend a whole day mixing a boiling mass with a heavy iron rod nearly as large as myself. I would be broken with fatigue at the day’s end. Other days, on the contrary, the work would be a most minute and delicate fractional crystallization, in the effort to concentrate the radium. I was then annoyed by the floating dust of iron and coal from which I could not protect my precious products. But I shall never be able to express the joy of the untroubled quietness of this atmosphere of research and the excitement of actual progress with the confident hope of still better results. The feeling of discouragement that sometimes came after some unsuccessful toil did not last long and gave way to renewed activity. We had happy moments devoted to a quiet discussion of our work, walking around our shed. One of our joys was to go into our workroom at night; we then perceived on all sides the feebly luminous silhouettes of the bottles or capsules containing our products. It was really a lovely sight and one always new to us. The glowing tubes looked like faint, fairy lights. (from Autobiographical Notes pp. 186-187.)",
    "The first experiments on the biological properties of radium were successfully made in France with samples from our laboratory, while my husband was living. The results were, at once, encouraging, so that the new branch of medical science, called radiumtherapy (in France, Curietherapy), developed rapidly, first in France and later in other countries. To supply the radium wanted for this purpose, a radium-producing industry was established. The first plant was created in France and worked very successfully, but afterwards manufactures were founded in other countries, the most important of which are now in America, where great quantities of radium ore, named “carnotite,” are available. The radiumtherapy and the radium production developed conjointly, and the results were more and more important for the treatment of several diseases, and particularly of cancer. As a consequence of this, several institutes have been founded, in the large cities, for the application of the new therapy. Some of these institutes own several grams of radium, the commercial price of the gram being now about $70,000, the cost of production depending on the very small proportion of radium in the ore. It may be easily understood how deeply I appreciated the privilege of realizing that our discovery had become a benefit to mankind, not only through its great scientific importance, but also by its power of efficient action against human suffering and terrible disease. This was indeed a splendid reward for our years of hard toil. (from Autobiographical Notes pp. 199-200.)",
    "In 1903 I finished my doctor's thesis and obtained the degree. At the end of the same year the Nobel prize was awarded jointly to Becquerel, my husband and me for the discovery of radioactivity and new radioactive elements. This event greatly increased the publicity of our work. For some time there was no more peace. Visitors and demands for lectures and articles interrupted every day....The fatigue resulting from the effort exceeding our forces, imposed by the unsatisfactory conditions of our labor, was augmented by the invasion of publicity. The overturn of our voluntary isolation was a cause of real suffering for us and had all the effect of disaster. It was serious trouble brought into the organization of our life, and I have already explained how indispensable was our freedom from external distraction, in order to maintain our family life and our scientific activity. Of course, people who contribute to that kind of trouble generally mean it kindly. It is only that they do not realize the conditions of the problem.(from Autobiographical Notes pp. 190-191.)",
    "In 1906 just as we were definitely giving up the old shed laboratory where we had been so happy, there came the dreadful catastrophe which took my husband away from me and left me alone to bring up our children and, at the same time, to continue our work of research. It is impossible for me to express the profoundness and importance of the crisis brought into my life by the loss of the one who had been my closest companion and best friend. Crushed by the blow, I did not feel able to face the future. I could not forget, however, what my husband used sometimes to say, that, even deprived of him, I ought to continue my work. The death of my husband, coming immediately after the general knowledge of the discoveries with which his name is associated, was felt by the public, and especially by the scientific circles, to be a national misfortune. It was largely under the influence of this emotion that the Faculty of Sciences of Paris decided to offer me the chair, as professor, which my husband had occupied only one year and a half in the Sorbonne. It was an exceptional decision, as up to then no woman had held such a position.... The honor that now came to me was deeply painful under the cruel circumstances of its coming. (from Autobiographical Notes pp. 191-192.)",
    "We put you into the coffin Saturday morning, and I held your head up for this move. We kissed your cold face for the last time. Then a few periwinkles from the garden on the coffin and the little picture of me that you called “the good little student” and that you loved. It is the picture that must go with you into the grave, the picture of her who had the happiness of pleasing you enough so that you did not hesitate to offer to share your life with her, even when you had seen her only a few times. You often told me that this was the only occasion in your life when you acted without hesitation, with the absolute conviction that you were doing well. My Pierre, I think you were not wrong. We were made to live together, and our union had to be. Your coffin was closed and I could see you no more. I didn’t allow them to cover it with the horrible black cloth. I covered it with flowers and I sat beside it.... They filled the grave and put sheaves of flowers on it. Everything is over, Pierre is sleeping his last sleep beneath the earth; it is the end of everything, everything, everything. I am working in the laboratory all day long, it is all I can do; I am better off there than anywhere else. I conceive of nothing any more that could give me personal joy, except perhaps scientific work–and even there, no, because if I succeeded with it, I would not endure you not to know it. (from Madame Curie p. 249.)",
    "The dominant duty imposed on everyone at that time was to help the country in whatever way possible during the extreme crisis that it faced. No general instructions to this were given to the members of the University. It was left to each to take his own initiative and means of action.... During the rapid succession of events in August 1914, it was clearly proved that the preparation for defense was insufficient. Public feeling was especially aroused by the realization of the grave failings which appeared in the organization of the Health Service. My own attention was particularly drawn to this situation, and I soon found a field of activity which, once entered upon, absorbed the greatest part of my time and efforts until the end of the war, and even for some time thereafter.... It is well known that the X-rays offer surgeons and doctors extremely useful means for the examination of the sick and wounded.... However, at the beginning of the war, the Military Board of Health had no organization of radiology, while the civil organization was also but little developed. Radiologic installations existed in only a small number of important hospitals, and there were only a few specialists in the large cities. The numerous new hospitals that were established all over France in the first months of the war had, as a rule, no installation for the use of X-rays. To meet this need I first gathered together all the apparatus I could find in the laboratories and stores. With this equipment I established in August and September, 1914, several stations of radiology, the operation of which was assured by volunteer helpers to whom I gave instruction. These stations rendered great service during the battle of the Marne. But as they could not satisfy the needs of all the hospitals of the Paris region, I fitted up, with the help of the Red Cross, a radiologic car. It was simply a touring motor-car, arranged for the transport of a complete radiologic apparatus, together with a dynamo that was worked by the engine of the car, and furnished the electric current necessary for the production of the rays. This car could come at the call of any of the hospitals, large or small, in the surroundings of Paris. Cases of urgent need were frequent, for these hospitals had to take care of the wounded who could not be transported to more distant places. (from Autobiographical Notes pp. 208-211.)",
    "The story of radiology in war offers a striking example of the unsuspected amplitude that the application of purely scientific discoveries can take under certain conditions. X rays had had only a limited usefulness up to the time of the war. The great catastrophe which was let loose upon humanity, accumulating its victims in terrifying numbers, brought up by reaction the ardent desire to save everything that could be saved and to exploit every means of sparing and protecting human life. At once there appeared an effort to make the X ray yield its maximum of service. What had seemed difficult became easy and received an immediate solution. The material and the personnel were multiplied as if by enchantment. All those who did not understand gave in or accepted; those who did not know learned; those who had been indifferent became devoted. Thus the scientific discovery achieved the conquest of its natural field of action. A similar evolution took place in radiumtherapy, or the medical application of radiations emitted by the radio elements. What are we to conclude from this unhoped-for development shared between the new radiations revealed to us by science at the end of the nineteenth century? It seems that they must make our confidence in disinterested research more alive and increase our reverence and admiration for it. (from Madame Curie p. 306.)",
    "As you have seen, fortune favors us at this moment; but these favors of fortune do not come without many worries. We have never been less tranquil than at this moment. There are days when we scarcely have time to breathe. And to think that we dreamed of living in the wild, quite removed from human beings! (Letters from Pierre Curie to his friend E. Gouy, 20 March 1902)",
    "I have wanted to write to you for a long time; excuse me if I have not done so. The cause is the stupid life which I lead at present. You have seen this sudden infatuation for radium, which has resulted for us in all the advantages of a moment of popularity. We have been pursued by journalists and photographers from all countries of the world; they have gone even so far as to report the conversation between my daughter and her nurse, and to describe the black- and-white cat that lives with us.... Further, we have had a great many appeals for money.... Finally, the collectors of autographs, snobs, society people, and even at times, scientists, have come to see us—in our magnificent and tranquil quarters in the laboratory—and every evening there has been a voluminous correspondence to send off. With such a state of things I feel myself invaded by a kind of stupor. And yet all this turmoil will not perhaps have been in vain, if it results in my getting a chair and a laboratory. (22 January 1904)",
    "We have regretted so much being deprived of your visit this year, but hope to see you in October. If we do not make an effort from time to time, we end by losing touch with our best and most congenial friends, and in keeping company with others for the simple reason that it is easy to meet them. We continue to lead the same life of people who are extremely occupied, without being able to accomplish anything interesting. It is now more than a year since I have been able to engage in any research, and I have no moment to myself. Clearly I have not yet discovered a means to defend ourselves against this frittering away of our time which is nevertheless extremely necessary. Intellectually, it is a question of life or death. (25 July 1905)",
    "I am neither very well, nor very ill; but I am easily fatigued, and I have left but very little capacity for work. My wife, on the contrary, leads a very active life, between her children, the School at Sèvres, and the laboratory. She does not lose a minute, and occupies herself more regularly than I can with the direction of the laboratory in which she passes the greater part of the day. (7 November 1905)",
    "Paris, Monday, 31 August 1914 (At this time the German Army was threatening Paris). Dear Irène, I’ve just received your sweet letter of Saturday and I wanted so much to hug you that I almost cried. This morning I was able to make my way to the train station where Fernand and Margaret were to leave—and I didn’t manage to see them. I wonder if they’ve left. Things are not going very well, and we all have a heavy heart and disturbed spirit. We need great courage and I hope we will not lack it. We must keep the firm hope that after these bad days, good times will return. It’s in that hope that I lock you in my heart, my beloved daughters. Mé [Mom] (from Correspondance pp. 129, 158.)",
    "Poperinghe, 24 January 1915 (Near Dunkirk). Dear Irène, After various wanderings, we’ve arrived here, but we can’t make an attempt at working until we’ve made some modifications at the hospital. They want to build a shelter for the car and a partition to create the radiology room in a big ward. That all holds up the work, but it’s difficult to do otherwise. In Dunkirk, German planes dropped some bombs that killed a few people, but the populace is scarcely frightened. At Poperinghe too these accidents happen, but less often. We hear the guns grumbling almost constantly. It’s not raining, a bit of frost. We were welcomed at the hospital with extreme cordiality, I have a nice room and they give me a fire in a stove at the side. I’m better off than at Furnes, I’ll eat at the hospital. With a hug, Mé (from Correspondance pp. 129, 158.)",
    "Prague, Sunday, 14 June 1925. Dear Irène, I got your letter of June 5 which arrived in Warsaw the 12th. I find that [delay] excessive... I arrived in Prague this morning and will leave tomorrow evening for Jachymow. I’m bewildered by the life I’m leading and incapable of telling you anything intelligent. I ask myself, what fundamental vice is there in the organization of humanity that makes this sort of agitation, to a certain degree, necessary? Mrs. Meloney [the American journalist who encouraged Marie to expose herself to the press] would call it, “Dignifying science.” And what’s undeniable is the sincerity of everyone who does these things and their conviction that they are necessary. Here I’m in a magnificent apartment, bedroom, sitting-room and bathroom, overlooking the river bordered by hills, and full of flowers they gave me at the train station—mostly roses since it’s their season. Unfortunately it’s gray, and I’m afraid it will rain.... With hugs, Mé (from Correspondance p. 255.)",
    "Constantly held in suspicion and spied upon, the children knew that a single conversation in Polish, or an imprudent word, might seriously harm, not only themselves, but also their families.(Marie Curie)"
    "I easily learned mathematics and physics, as far as these sciences were taken in consideration in the school. I found in this ready help from my father, who loved science....Unhappily, he had no laboratory and could not perform experiments. (Marie Curie)",
    "It was one of those groups of Polish youths who believed that the hope of their country lay in a great effort to develop the intellectual and moral strength of the nation....we agreed among ourselves to give evening courses, each one teaching what he knew best. (Marie Curie)",
]
text = text_chunks
source_type = ["interview" for t in text_chunks]

In [22]:
data = [source_type, text]

# Transpose the data to have the inner lists as rows
transposed_data = list(map(list, zip(*data)))

# Column names for the DataFrame
columns = ['source_type','text']

# Create the DataFrame
df = pd.DataFrame(transposed_data, columns=columns)

# Update dataframe
df = update_dataframe('{}/df_text.csv'.format(db_dir), df)
df.to_csv('{}/df_text.csv'.format(db_dir), index=False)

## 4. Website/CV

In [15]:
websites = ["https://en.wikipedia.org/wiki/J._Robert_Oppenheimer", 
            "https://en.wikipedia.org/wiki/Manhattan_Project", 
            "https://en.wikipedia.org/wiki/Oppenheimer_security_hearing", 
            "https://en.wikipedia.org/wiki/American_Prometheus", 
            "https://www.ias.edu/oppenheimer-legacy", 
            "https://www.goodreads.com/author/quotes/308544.J_Robert_Oppenheimer",
            "https://en.wikipedia.org/wiki/Oppenheimer_(film)"]
text_website = [scrape_website_text(website) for website in tqdm(websites)]
text_website = [", ".join(filter(None, text.replace("\n", ",").split(","))) for text in text_website]

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:01<00:00,  4.01it/s]


In [25]:
websites = ["https://en.wikipedia.org/wiki/Marie_Curie", 
            "https://en.wikipedia.org/wiki/Pierre_Curie", 
            "https://en.wikipedia.org/wiki/Ir%C3%A8ne_Joliot-Curie",
            "https://en.wikipedia.org/wiki/Curie_family",
            "https://en.wikipedia.org/wiki/Curie_Institute_(Paris)", 
            "https://en.wikipedia.org/wiki/Maria_Sk%C5%82odowska-Curie_National_Research_Institute_of_Oncology", 
            "https://en.wikipedia.org/wiki/List_of_female_Nobel_laureates", 
            "https://en.wikipedia.org/wiki/Nobel_Prize#Multiple_laureates",
            "https://en.wikipedia.org/wiki/Polonium",
            "https://en.wikipedia.org/wiki/Marie_Sk%C5%82odowska-Curie_Actions",
            "https://en.wikipedia.org/wiki/Treatise_on_Radioactivity",
            "https://en.wikipedia.org/wiki/Radioactive_(film)",
            "https://en.wikipedia.org/wiki/Marie_Curie:_The_Courage_of_Knowledge",
            "https://en.wikipedia.org/wiki/Marie_Curie,_une_femme_sur_le_front",
            "https://en.wikipedia.org/wiki/Madame_Curie_(film)",
            "https://www.nobelprize.org/laureate/6",
            "https://www.gutenberg.org/cache/epub/61622/pg61622-images.html",
            "https://www.gutenberg.org/cache/epub/69617/pg69617-images.html",
            "https://www.gutenberg.org/cache/epub/60564/pg60564-images.html",
            #"https://www.americanscientist.org/article/the-inner-marie-curie",
            "https://www.smithsonianmag.com/history/madame-curies-passion-74183598",
            "https://aqrinternational.co.uk/marie-curie-a-role-model-for-mental-toughness-pioneer-for-women-in-stemm-careers",
            "https://www.newscientist.com/article/mg21528781-900-marie-curie-the-family-woman",
            #"https://blogs.lib.umich.edu/lost-stacks/marie-curie-and-her-daughters-shelley-emling",
           ]
text_website = [scrape_website_text(website) for website in tqdm(websites)]
text_website = [", ".join(filter(None, text.replace("\n", ",").split(","))) for text in text_website]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:07<00:00,  2.86it/s]


In [26]:
data = [len(text_website) * ["website"], text_website]

# Transpose the data to have the inner lists as rows
transposed_data = list(map(list, zip(*data)))

# Column names for the DataFrame
columns = ['source_type','text']

# Create the DataFrame
df = pd.DataFrame(transposed_data, columns=columns)

# Update dataframe
df = update_dataframe('{}/df_text.csv'.format(db_dir), df)
df.to_csv('{}/df_text.csv'.format(db_dir), index=False)

In [17]:
len(df)  # Number of context objects (papers, sites, etc)
print(df)

   source_type                                               text
0        paper  (Wednesday Moraine: Ele mentary Particles; J.R...
1      website  J. Robert Oppenheimer - Wikipedia, Jump to con...
2      website  Manhattan Project - Wikipedia, Jump to content...
3      website  Oppenheimer security hearing - Wikipedia, Jump...
4      website  American Prometheus - Wikipedia, Jump to conte...
5      website  J. Robert Oppenheimer: Life,  Work,  and Legac...
6      website  J. Robert Oppenheimer Quotes  (Author of The O...
7        paper  Mev  and experimentally it was found to be 37 ...
8        paper  (Tuesday Afternoon: Theoretical Session , J. S...
9      website  J. Robert Oppenheimer - Wikipedia, Jump to con...
10     website  Manhattan Project - Wikipedia, Jump to content...
11     website  Oppenheimer security hearing - Wikipedia, Jump...
12     website  American Prometheus - Wikipedia, Jump to conte...
13     website  J. Robert Oppenheimer: Life,  Work,  and Legac...
14     web

In [28]:
print(len(df))  # Number of context objects (papers, sites, etc)
print(df)

50
   source_type                                               text
0        paper  Marie Curie  \n \nRayons émis par les composés...
1        paper  Marie Curie\nCurie c. 1920\nBorn Maria Salomea...
2        paper  Radium and RadioactivityBy Mme. Sklodowska Cur...
3        paper  MOULD\n74\nCURRENT ONCOLOGY—VOLUME 14, NUMBER ...
4        paper  JULY,1931 REVIER'SOFMODER1V' I'HYSICS VOLUME 3...
5        paper  YALEJOURNALOFBIOLOGYANDMEDICINE76(2003),pp.167...
6      website  Marie Curie - Wikipedia, Jump to content, Main...
7      website  Pierre Curie - Wikipedia, Jump to content, Mai...
8      website  Irène Joliot-Curie - Wikipedia, Jump to conten...
9      website  Curie family - Wikipedia, Jump to content, Mai...
10     website  Curie Institute (Paris) - Wikipedia, Jump to c...
11     website  Maria Skłodowska-Curie National Research Insti...
12     website  List of female Nobel laureates - Wikipedia, Ju...
13     website  Nobel Prize - Wikipedia, Jump to content, Main...
14     