## Installation

In [8]:
%pip install python-terrier



In [9]:
import pyterrier as pt
if not pt.started():
  pt.init()

## Science Experiments

### Indexing

In [14]:
import pandas as pd

docs_df = pd.read_json("./science_experiments/experiment_archive.json")

docs_df_2 = pd.read_json("./science_experiments/steve_spangler.json")
docs_df_2 = docs_df_2.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

docs_df_3 = pd.read_json("./science_experiments/science_buddies.json")
docs_df_3 = docs_df_3.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
docs_df_3 = docs_df_3.loc[docs_df_3["explanation"] != ""]

docs_df = pd.concat([docs_df, docs_df_2], ignore_index=True)
docs_df = pd.concat([docs_df, docs_df_3], ignore_index=True)
docs_df = docs_df.fillna('')

docno = ['d'+ str(i) for i in range(docs_df.shape[0])]
docs_df['docno'] = docno
docs_df['text'] = docs_df['title'] + ' ' + docs_df['subject'] + ' ' + docs_df['explanation']


exp_title = docs_df.title.values
exp_link = docs_df.link.values
exp_desc = docs_df.description.values
exp_subj = docs_df.subject.values
docs_df

Unnamed: 0,title,subject,description,link,explanation,docno,text
0,Rainbow milk,Chemistry,Create a color explosion in milk. About polari...,https://www.experimentarchive.com/experiments/...,Milk is a mixture consisting of water and chem...,d0,Rainbow milk Chemistry Milk is a mixture consi...
1,Egg drop challenge,Technology,Build a landing device for an egg and then dro...,https://www.experimentarchive.com/experiments/...,Deformation is a term within physics and techn...,d1,Egg drop challenge Technology Deformation is a...
2,Cartesian diver,Physics,Squeeze the bottle and the matches sink. About...,https://www.experimentarchive.com/experiments/...,The head of a match is porous and contains sma...,d2,Cartesian diver Physics The head of a match is...
3,Drops on a coin,Chemistry,How many water drops can fit on a coin? About ...,https://www.experimentarchive.com/experiments/...,Water consists of water molecules that attract...,d3,Drops on a coin Chemistry Water consists of wa...
4,Homemade marble run,Physics,Build a marble run on the wall. About gravitat...,https://www.experimentarchive.com/experiments/...,"The higher up something is, the more gravitati...",d4,Homemade marble run Physics The higher up some...
...,...,...,...,...,...,...,...
1464,Build a Paper Roller Coaster,,,https://www.sciencebuddies.org/stem-activities...,Have you ever ridden a roller coaster? Have yo...,d1464,Build a Paper Roller Coaster Have you ever ri...
1465,Electrolyte Challenge: Orange Juice Vs. Sports...,Chemistry,The makers of sports drinks spend tens to hund...,https://www.sciencebuddies.org/science-fair-pr...,"""Just do it!"" You have probably heard that slo...",d1465,Electrolyte Challenge: Orange Juice Vs. Sports...
1466,What Makes Ice Melt Fastest?,Chemistry,If you live in a place that gets cold in the w...,https://www.sciencebuddies.org/science-fair-pr...,If you have ever made homemade ice cream the o...,d1466,What Makes Ice Melt Fastest? Chemistry If you ...
1467,Do Migratory Birds Like It Hot?,Zoology,Why do birds migrate? Do all birds have the sa...,https://www.sciencebuddies.org/science-fair-pr...,Have you ever noticed that some of the bird sp...,d1467,Do Migratory Birds Like It Hot? Zoology Have y...


In [15]:
indexer = pt.DFIndexer("./index_3docs", overwrite=True)
index_ref = indexer.index(docs_df['text'], docs_df['docno'])

In [16]:
index = pt.IndexFactory.of(index_ref)

for kv in index.getLexicon():
  print("%s  -> %s " % (kv.getKey(), kv.getValue().toString()  ))

0  -> term632 Nt=227 TF=590 maxTF=32 @{0 0 0} 
00  -> term3328 Nt=54 TF=63 maxTF=5 @{0 201 5} 
000  -> term348 Nt=119 TF=247 maxTF=20 @{0 259 4} 
001  -> term1916 Nt=7 TF=10 maxTF=2 @{0 382 6} 
0011  -> term7374 Nt=1 TF=3 maxTF=3 @{0 395 5} 
0014  -> term6481 Nt=2 TF=2 maxTF=1 @{0 398 3} 
002  -> term606 Nt=3 TF=3 maxTF=1 @{0 400 7} 
0025  -> term611 Nt=1 TF=2 maxTF=2 @{0 405 3} 
0030  -> term6517 Nt=2 TF=3 maxTF=2 @{0 406 6} 
0082  -> term5511 Nt=1 TF=1 maxTF=1 @{0 409 3} 
01  -> term1250 Nt=15 TF=19 maxTF=2 @{0 411 5} 
010  -> term10067 Nt=2 TF=3 maxTF=2 @{0 434 5} 
011  -> term10074 Nt=1 TF=1 maxTF=1 @{0 439 2} 
012  -> term10073 Nt=1 TF=1 maxTF=1 @{0 441 6} 
014  -> term11258 Nt=1 TF=1 maxTF=1 @{0 444 2} 
015  -> term6261 Nt=3 TF=4 maxTF=2 @{0 447 0} 
017  -> term10032 Nt=1 TF=1 maxTF=1 @{0 453 5} 
02  -> term5117 Nt=7 TF=7 maxTF=1 @{0 456 1} 
020  -> term10072 Nt=1 TF=1 maxTF=1 @{0 468 3} 
022  -> term10068 Nt=2 TF=3 maxTF=2 @{0 470 7} 
023  -> term2056 Nt=2 TF=3 maxTF=2 @{0 475 4

### Query

In [17]:
# DEFINING HELPER FUNCTIONS

def get_exp_title(docid):
  id = int(docid[1:])
  return exp_title[id]

def get_exp_link(docid):
  id = int(docid[1:])
  return exp_link[id]

def get_exp_description(docid):
  id = int(docid[1:])
  return exp_desc[id]

def get_exp_subject(docid):
  id = int(docid[1:])
  return exp_subj[id]

def retrieve_exp(df):
  title = []
  link = []
  desc = []
  subject = []
  for i in range(df.shape[0]):
    docid = df.loc[i, 'docno']
    title.append(get_exp_title(docid))
    link.append(get_exp_link(docid))
    desc.append(get_exp_description(docid))
    subject.append(get_exp_subject(docid))
  df['Title'] = title
  df['Link'] = link
  df['Description'] = desc
  df['Subject'] = subject
  return df

In [18]:
br = pt.BatchRetrieve(index, wmodel="BM25")

queries = pd.DataFrame([["q1", "air baloon"], ["q2", "rocket yogurt"], ['q3', 'chemistry']], columns=["qid", "query"])
res = br(queries)
retrieve_exp(res)

Unnamed: 0,qid,docid,docno,rank,score,query,Title,Link,Description,Subject
0,q1,30,d30,0,3.472609,air baloon,Trash airplane,https://www.experimentarchive.com/experiments/...,Build an airplane. An experiment about Bernoui...,Technology
1,q1,1326,d1326,1,3.450828,air baloon,How Does a Hovercraft Work?,https://www.sciencebuddies.org/science-fair-pr...,Have you ever ridden on a hovercraft? It is li...,Aerodynamics & Hydrodynamics
2,q1,83,d83,2,3.447885,air baloon,Heavy smoke,https://www.experimentarchive.com/experiments/...,"Create heavy, cold, smoke. About why some thin...",Physics
3,q1,107,d107,3,3.445969,air baloon,Floating ping pong ball,https://www.experimentarchive.com/experiments/...,Float a ball above your science hairdryer. Abo...,Physics
4,q1,206,d206,4,3.441248,air baloon,Floating Ping-Pong Balls and Flying Toilet Paper,https://stevespangler.com/experiments/flying-p...,Amuse the neighbors for hours as you make obje...,
...,...,...,...,...,...,...,...,...,...,...
523,q3,1135,d1135,136,1.632524,chemistry,I'm Trying to Breathe Here! Dissolved Oxygen v...,https://www.sciencebuddies.org/science-fair-pr...,"To survive, we need oxygen in the air we breat...",Environmental Science
524,q3,1099,d1099,137,1.627147,chemistry,Explore How Chromatography Can Unmix Mixtures,https://www.sciencebuddies.org/science-fair-pr...,Did you know that mixtures can be unmixed? Chr...,Chemistry
525,q3,1003,d1003,138,1.605988,chemistry,Column Chromatography: Can you Separate the Dy...,https://www.sciencebuddies.org/science-fair-pr...,What color is grape soda? If you pour it into...,Biotechnology
526,q3,90,d90,139,1.324933,chemistry,Forever boiling bottle,https://www.experimentarchive.com/experiments/...,Water that boils at room temperature. About bo...,Chemistry


In [None]:
doc_list = res['docno'].tolist()

docs_df[docs_df['docno'].isin(doc_list)]

Unnamed: 0,title,subject,description,link,explanation,docno,text
0,Rainbow milk,Chemistry,Create a color explosion in milk. About polari...,https://www.experimentarchive.com/experiments/...,Milk is a mixture consisting of water and chem...,d0,Rainbow milk Chemistry Milk is a mixture consi...
2,Cartesian diver,Physics,Squeeze the bottle and the matches sink. About...,https://www.experimentarchive.com/experiments/...,The head of a match is porous and contains sma...,d2,Cartesian diver Physics The head of a match is...
3,Drops on a coin,Chemistry,How many water drops can fit on a coin? About ...,https://www.experimentarchive.com/experiments/...,Water consists of water molecules that attract...,d3,Drops on a coin Chemistry Water consists of wa...
4,Homemade marble run,Physics,Build a marble run on the wall. About gravitat...,https://www.experimentarchive.com/experiments/...,"The higher up something is, the more gravitati...",d4,Homemade marble run Physics The higher up some...
6,Heavy paper,Earth science,Break a ruler using a newspaper. An experiment...,https://www.experimentarchive.com/experiments/...,It's not the newspaper that is heavy enough to...,d6,Heavy paper Earth science It's not the newspap...
...,...,...,...,...,...,...,...
226,Balloon-Powered Car,,"When it comes to powering a race car, there ar...",https://stevespangler.com/experiments/balloon-...,How Does It Work\nThe concept behind the Ballo...,d226,Balloon-Powered Car How Does It Work\nThe con...
227,Talking Cups,,From the soulful crooning of an R&B singer to ...,https://stevespangler.com/experiments/talking-...,How Does It Work\nSound is created and transmi...,d227,Talking Cups How Does It Work\nSound is creat...
229,Balloon Expansion,,Here’s a great way to actually see how air fil...,https://stevespangler.com/experiments/balloon-...,How Does It Work\nThis activity shows that air...,d229,Balloon Expansion How Does It Work\nThis acti...
230,CD Hovercraft,,The puck used in an ice hockey game is designe...,https://stevespangler.com/experiments/cd-hover...,How Does It Work\nHovercrafts use air to lift ...,d230,CD Hovercraft How Does It Work\nHovercrafts u...
