## Installation

In [5]:
!pip install python-terrier



In [6]:
!pip install --upgrade pandas




In [7]:
import pyterrier as pt
if not pt.started():
  pt.init()

  from .autonotebook import tqdm as notebook_tqdm
PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


## Science Experiments

### Indexing

In [8]:
import pandas as pd

docs_df = pd.read_json("./science_experiments/experiment_archive.json")

docs_df_2 = pd.read_json("./science_experiments/steve_spangler.json")
docs_df_2 = docs_df_2.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

docs_df_3 = pd.read_json("'/science_experiments/science_buddies.json")
docs_df_3 = docs_df_3.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

docs_df = pd.concat([docs_df, docs_df_2], ignore_index=True)
docs_df = pd.concat([docs_df, docs_df_3], ignore_index=True)
docs_df = docs_df.fillna('')

docno = ['d'+ str(i) for i in range(docs_df.shape[0])]
docs_df['docno'] = docno
docs_df['text'] = docs_df['title'] + ' ' + docs_df['subject'] + ' ' + docs_df['explanation']


exp_title = docs_df.title.values
exp_link = docs_df.link.values
exp_desc = docs_df.description.values
exp_subj = docs_df.subject.values
docs_df

Unnamed: 0,title,subject,description,link,explanation,docno,text
0,Rainbow milk,Chemistry,Create a color explosion in milk. About polari...,https://www.experimentarchive.com/experiments/...,Milk is a mixture consisting of water and chem...,d0,Rainbow milk Chemistry Milk is a mixture consi...
1,Egg drop challenge,Technology,Build a landing device for an egg and then dro...,https://www.experimentarchive.com/experiments/...,Deformation is a term within physics and techn...,d1,Egg drop challenge Technology Deformation is a...
2,Cartesian diver,Physics,Squeeze the bottle and the matches sink. About...,https://www.experimentarchive.com/experiments/...,The head of a match is porous and contains sma...,d2,Cartesian diver Physics The head of a match is...
3,Drops on a coin,Chemistry,How many water drops can fit on a coin? About ...,https://www.experimentarchive.com/experiments/...,Water consists of water molecules that attract...,d3,Drops on a coin Chemistry Water consists of wa...
4,Homemade marble run,Physics,Build a marble run on the wall. About gravitat...,https://www.experimentarchive.com/experiments/...,"The higher up something is, the more gravitati...",d4,Homemade marble run Physics The higher up some...
...,...,...,...,...,...,...,...
233,Pop Bottle Music,,"A popular Las Vegas musical act uses tubes, bo...",https://stevespangler.com/experiments/pop-bott...,How Does It Work\nThe science of sound is all ...,d233,Pop Bottle Music How Does It Work\nThe scienc...
234,Burning Money,,Do you have money burning a hole in your pocke...,https://stevespangler.com/experiments/burning-...,How Does It Work\nBy now you’ve probably guess...,d234,Burning Money How Does It Work\nBy now you’ve...
235,Pop Rocks Expander (Candy Science),,Pop Rocks is one of the greatest candy inventi...,https://stevespangler.com/experiments/poprocks/,How Does It Work\nThe secret behind the famous...,d235,Pop Rocks Expander (Candy Science) How Does I...
236,Instant Freeze Water - Bottle Slam,,You put a plastic bottle of soda pop or water ...,https://stevespangler.com/experiments/instant-...,How Does It Work\nYou used salt and ice to dro...,d236,Instant Freeze Water - Bottle Slam How Does I...


In [9]:
indexer = pt.DFIndexer("./index_3docs", overwrite=True)
index_ref = indexer.index(docs_df['text'], docs_df['docno'])

In [10]:
index = pt.IndexFactory.of(index_ref)

for kv in index.getLexicon():
  print("%s  -> %s " % (kv.getKey(), kv.getValue().toString()  ))

0  -> term632 Nt=13 TF=39 maxTF=10 @{0 0 0} 
00  -> term3328 Nt=1 TF=1 maxTF=1 @{0 16 4} 
000  -> term348 Nt=7 TF=17 maxTF=6 @{0 18 4} 
001  -> term1916 Nt=1 TF=1 maxTF=1 @{0 27 0} 
002  -> term606 Nt=3 TF=3 maxTF=1 @{0 28 6} 
0025  -> term611 Nt=1 TF=2 maxTF=2 @{0 33 2} 
01  -> term1250 Nt=1 TF=1 maxTF=1 @{0 34 5} 
023  -> term2056 Nt=1 TF=2 maxTF=2 @{0 36 1} 
03  -> term3321 Nt=1 TF=1 maxTF=1 @{0 38 0} 
05  -> term1899 Nt=1 TF=1 maxTF=1 @{0 40 0} 
06  -> term3330 Nt=1 TF=1 maxTF=1 @{0 41 6} 
063  -> term605 Nt=1 TF=2 maxTF=2 @{0 43 6} 
079  -> term607 Nt=1 TF=2 maxTF=2 @{0 45 1} 
086  -> term2531 Nt=1 TF=1 maxTF=1 @{0 46 4} 
096  -> term2038 Nt=1 TF=1 maxTF=1 @{0 48 4} 
1  -> term211 Nt=32 TF=69 maxTF=8 @{0 50 2} 
10  -> term310 Nt=10 TF=18 maxTF=4 @{0 78 5} 
100  -> term1871 Nt=4 TF=6 maxTF=3 @{0 88 5} 
1000  -> term3211 Nt=1 TF=1 maxTF=1 @{0 93 3} 
104  -> term2100 Nt=1 TF=1 maxTF=1 @{0 95 3} 
106  -> term1315 Nt=1 TF=1 maxTF=1 @{0 97 1} 
109  -> term1085 Nt=2 TF=2 maxTF=1 @{0 98 5

### Query

In [11]:
# DEFINING HELPER FUNCTIONS

def get_exp_title(docid):
  id = int(docid[1:])
  return exp_title[id]

def get_exp_link(docid):
  id = int(docid[1:])
  return exp_link[id]

def get_exp_description(docid):
  id = int(docid[1:])
  return exp_desc[id]

def get_exp_subject(docid):
  id = int(docid[1:])
  return exp_subj[id]

def retrieve_exp(df):
  title = []
  link = []
  desc = []
  subject = []
  for i in range(df.shape[0]):
    docid = df.loc[i, 'docno']
    title.append(get_exp_title(docid))
    link.append(get_exp_link(docid))
    desc.append(get_exp_description(docid))
    subject.append(get_exp_subject(docid))
  df['Title'] = title
  df['Link'] = link
  df['Description'] = desc
  df['Subject'] = subject
  return df

In [12]:
br = pt.BatchRetrieve(index, wmodel="BM25")

queries = pd.DataFrame([["q1", "air baloon"], ["q2", "rocket yogurt"], ['q3', 'chemistry']], columns=["qid", "query"])
res = br(queries)
retrieve_exp(res)

Unnamed: 0,qid,docid,docno,rank,score,query,Title,Link,Description,Subject
0,q1,30,d30,0,1.483964,air baloon,Trash airplane,https://www.experimentarchive.com/experiments/...,Build an airplane. An experiment about Bernoui...,Technology
1,q1,107,d107,1,1.468948,air baloon,Floating ping pong ball,https://www.experimentarchive.com/experiments/...,Float a ball above your science hairdryer. Abo...,Physics
2,q1,206,d206,2,1.465549,air baloon,Floating Ping-Pong Balls and Flying Toilet Paper,https://stevespangler.com/experiments/flying-p...,Amuse the neighbors for hours as you make obje...,
3,q1,54,d54,3,1.463424,air baloon,Plastic bag parachute,https://www.experimentarchive.com/experiments/...,Make a parachute from a plastic bag (not for y...,Technology
4,q1,83,d83,4,1.460112,air baloon,Heavy smoke,https://www.experimentarchive.com/experiments/...,"Create heavy, cold, smoke. About why some thin...",Physics
...,...,...,...,...,...,...,...,...,...,...
120,q3,40,d40,23,2.151170,chemistry,Bouncing soap bubbles,https://www.experimentarchive.com/experiments/...,Hold a soap bubble in your hand. About polar a...,Chemistry
121,q3,105,d105,24,1.972784,chemistry,Special: Colored fire,https://www.experimentarchive.com/experiments/...,Create flames in a range of colors. About fire...,Chemistry
122,q3,72,d72,25,1.965204,chemistry,Special: Dust explosion,https://www.experimentarchive.com/experiments/...,Flour can be deadly. An experiment about chemi...,Chemistry
123,q3,79,d79,26,1.815253,chemistry,Special: Whoosh bottle,https://www.experimentarchive.com/experiments/...,"Ignite ethanol to produce a pulsating, blue fi...",Chemistry


In [13]:
doc_list = res['docno'].tolist()

docs_df[docs_df['docno'].isin(doc_list)]

Unnamed: 0,title,subject,description,link,explanation,docno,text
0,Rainbow milk,Chemistry,Create a color explosion in milk. About polari...,https://www.experimentarchive.com/experiments/...,Milk is a mixture consisting of water and chem...,d0,Rainbow milk Chemistry Milk is a mixture consi...
2,Cartesian diver,Physics,Squeeze the bottle and the matches sink. About...,https://www.experimentarchive.com/experiments/...,The head of a match is porous and contains sma...,d2,Cartesian diver Physics The head of a match is...
3,Drops on a coin,Chemistry,How many water drops can fit on a coin? About ...,https://www.experimentarchive.com/experiments/...,Water consists of water molecules that attract...,d3,Drops on a coin Chemistry Water consists of wa...
4,Homemade marble run,Physics,Build a marble run on the wall. About gravitat...,https://www.experimentarchive.com/experiments/...,"The higher up something is, the more gravitati...",d4,Homemade marble run Physics The higher up some...
6,Heavy paper,Earth science,Break a ruler using a newspaper. An experiment...,https://www.experimentarchive.com/experiments/...,It's not the newspaper that is heavy enough to...,d6,Heavy paper Earth science It's not the newspap...
...,...,...,...,...,...,...,...
226,Balloon-Powered Car,,"When it comes to powering a race car, there ar...",https://stevespangler.com/experiments/balloon-...,How Does It Work\nThe concept behind the Ballo...,d226,Balloon-Powered Car How Does It Work\nThe con...
227,Talking Cups,,From the soulful crooning of an R&B singer to ...,https://stevespangler.com/experiments/talking-...,How Does It Work\nSound is created and transmi...,d227,Talking Cups How Does It Work\nSound is creat...
229,Balloon Expansion,,Here’s a great way to actually see how air fil...,https://stevespangler.com/experiments/balloon-...,How Does It Work\nThis activity shows that air...,d229,Balloon Expansion How Does It Work\nThis acti...
230,CD Hovercraft,,The puck used in an ice hockey game is designe...,https://stevespangler.com/experiments/cd-hover...,How Does It Work\nHovercrafts use air to lift ...,d230,CD Hovercraft How Does It Work\nHovercrafts u...
