In [1]:
import pandas as pd
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
from thefuzz import fuzz
import json
import re
import urllib.parse

In [2]:
df_metadata_trec = pd.read_parquet("../../../data/metadata_TREC.parquet")

In [3]:
with open("../../../data/SemanticScholar_TREC.json", encoding="utf-8") as f:
    SemanticScholar_FINAL = json.load(f)

In [6]:
# Initialize lists to store data

merge_list = [] # Will contain pairs of Semantic Scholar IDs and corresponding OpenAlex IDs

# Iterate through each entry in the SemanticScholar_FINAL dictionary
for i in SemanticScholar_FINAL:
    try:
        # Attempt to extract the OpenAlex ID for each Semantic Scholar entry
        merge_list.append([i, "W" + SemanticScholar_FINAL[i]["externalIds"]["MAG"]]) # Append the MAG ID by Semantic Scholar and OpenAlex ID to merge_list
    except:
        # If there's a KeyError, it means the expected data is missing
        continue

In [8]:
df_has_mag = pd.DataFrame(merge_list, columns=["ID","MAG"])

In [9]:
df_has_mag_merged = pd.merge(df_has_mag, df_metadata_trec, how="inner", left_on="ID", right_on="ID")

In [19]:
def extract_years(title):
    return re.findall(r'\b\d{4}\b', title)

In [12]:
# Initialize dictionaries and lists to store data

OpenAlex_by_MAG = {}  # Will store matched OpenAlex works with Semantic Scholar IDs as keys
ids_found = []        # Will keep track of IDs that were successfully matched

# Iterate through each row in the DataFrame df_has_mag_merged (Already found documents in Semantic Scholar that provide MAG ID)
for i,j in df_has_mag_merged.iterrows():
    try:
        # Fetch the OpenAlex work using the MAG ID from the current row
        work = Works()[j["MAG"]]
        
        # Extract years from the work title and the DataFrame title
        years_semantic = extract_years(work["title"])
        years_df = extract_years(j["Title"])

        # Check if the titles match closely and if the years are the same
        if fuzz.ratio(j["Title"], work["title"]) > 95 and years_df == years_semantic: 
            OpenAlex_by_MAG[j["ID"]] = work # Store the matched work in the dictionary with Semantic Scholar ID as key
            ids_found.append(j["ID"])       # Add the ID to the list of found IDs
    except:
        # Handle cases where the OpenAlex work could not be retrieved or other errors occur
        print("ID not given or found")
        continue


0 / 1605
1 / 1605
2 / 1605
3 / 1605
4 / 1605
5 / 1605
6 / 1605
7 / 1605
8 / 1605
9 / 1605
10 / 1605
11 / 1605
12 / 1605
13 / 1605
14 / 1605
15 / 1605
16 / 1605
17 / 1605
18 / 1605
19 / 1605
20 / 1605
21 / 1605
22 / 1605
23 / 1605
24 / 1605
25 / 1605
26 / 1605
27 / 1605
28 / 1605
29 / 1605
30 / 1605
31 / 1605
32 / 1605
33 / 1605
34 / 1605
35 / 1605
36 / 1605
37 / 1605
38 / 1605
39 / 1605
40 / 1605
41 / 1605
42 / 1605
43 / 1605
44 / 1605
45 / 1605
46 / 1605
47 / 1605
48 / 1605
49 / 1605
50 / 1605
51 / 1605
52 / 1605
53 / 1605
54 / 1605
55 / 1605
56 / 1605
57 / 1605
58 / 1605
59 / 1605
60 / 1605
61 / 1605
62 / 1605
63 / 1605
64 / 1605
65 / 1605
66 / 1605
67 / 1605
68 / 1605
69 / 1605
70 / 1605
71 / 1605
72 / 1605
73 / 1605
74 / 1605
75 / 1605
76 / 1605
77 / 1605
78 / 1605
79 / 1605
80 / 1605
81 / 1605
82 / 1605
83 / 1605
84 / 1605
85 / 1605
86 / 1605
87 / 1605
88 / 1605
89 / 1605
90 / 1605
91 / 1605
92 / 1605
93 / 1605
94 / 1605
95 / 1605
96 / 1605
97 / 1605
98 / 1605
99 / 1605
100 / 1605

In [17]:
# IDs from OpenAlex that we have already found and processed by MAG ID
ids_found_with_MAG = list(OpenAlex_by_MAG.keys())

# Filter out documents from the main DataFrame that have already been processed (i.e., have MAG IDs)
df_no_mag = df_metadata_trec[~df_metadata_trec["ID"].isin(ids_not_found)]

# Dictionary to store documents that need to be matched by title
documents_without_MAG = {}

# Iterate through the filtered DataFrame
for i, j in df_no_mag.iterrows():
    
    # Clean and encode the title for the search query
    title = j["Title"].replace(",", "")
    title_encoded = urllib.parse.quote(title)
    try:
        # Search for works in OpenAlex by the cleaned and encoded title
        work = Works().search_filter(title=title_encoded).get()    
        
        if len(work) == 0:
            # If no results are found, print a message and continue to the next document
            print("no matches")
            continue
        elif len(work) == 1:
            # If exactly one result is found, check if it matches well with the title and year
            print("One option")
            years_semantic = extract_years(work[0]["title"])
            years_df = extract_years(j["Title"])
            if fuzz.ratio(j["Title"], work[0]["title"]) > 95 and years_df == years_semantic:
                # Add the result to the dictionary if the title and year match
                documents_without_MAG[j["ID"]] = work[0]
        elif len(work) > 1:
            # If multiple results are found, display them for user selection
            for k in work:
                print(k["id"])
            print(j["Title"])
            print(j["Authors"])
            
            # Prompt the user to select the correct result
            mag_id_to_take = int(input())
            if mag_id_to_take != "-":
                documents_without_MAG[j["ID"]] = work[mag_id_to_take]
    except:
        continue

        

1 / 569
https://openalex.org/W22376524
https://openalex.org/W1513290917
The TREC-9 Interactive Track Report.
['William R. Hersh' 'Paul Over']


 0


2 / 569
no matches
3 / 569
no matches
4 / 569
One option
5 / 569
One option
6 / 569
no matches
7 / 569
One option
8 / 569
One option
9 / 569
One option
10 / 569
One option
11 / 569
no matches
12 / 569
no matches
13 / 569
https://openalex.org/W2046020226
https://openalex.org/W2916684075
The TREC 2001 Filtering Track Report.
['Stephen E. Robertson' 'Ian Soboroff']


 0


14 / 569
https://openalex.org/W33021439
https://openalex.org/W2916613452
https://openalex.org/W2902475772
The TREC 2001 Interactive Track Report.
['William R. Hersh' 'Paul Over']


 0


15 / 569
One option
16 / 569
One option
17 / 569
One option
18 / 569
One option
19 / 569
One option
20 / 569
One option
21 / 569
One option
22 / 569
One option
23 / 569
One option
24 / 569
One option
25 / 569
One option
26 / 569
One option
27 / 569
One option
28 / 569
One option
29 / 569
One option
30 / 569
One option
31 / 569
One option
32 / 569
One option
33 / 569
One option
34 / 569
One option
35 / 569
One option
36 / 569
One option
37 / 569
One option
38 / 569
One option
39 / 569
One option
40 / 569
https://openalex.org/W1691915759
https://openalex.org/W1744380636
Machine Learning Approach for Homepage Finding Task.
['Wensi Xi' 'Edward A. Fox']


 1


41 / 569
One option
42 / 569
https://openalex.org/W2158890594
https://openalex.org/W2623071438
Overview of the TREC 2002 Question Answering Track.
['Ellen M. Voorhees']


 0


43 / 569
no matches
44 / 569
One option
45 / 569
no matches
46 / 569
One option
47 / 569
One option
48 / 569
One option
49 / 569
One option
50 / 569
One option
51 / 569
One option
52 / 569
One option
53 / 569
One option
54 / 569
One option
55 / 569
One option
56 / 569
One option
57 / 569
no matches
58 / 569
One option
59 / 569
One option
60 / 569
One option
61 / 569
One option
62 / 569
One option
63 / 569
no matches
64 / 569
One option
65 / 569
One option
66 / 569
https://openalex.org/W2159053585
https://openalex.org/W2917293077
https://openalex.org/W2123273303
https://openalex.org/W2917056928
https://openalex.org/W2917915395
TREC GENOMICS Track Overview.
['William R. Hersh' 'Ravi Teja Bhupatiraju']


 0


67 / 569
One option
68 / 569
https://openalex.org/W1481997832
https://openalex.org/W2625462793
Overview of the TREC 2003 Novelty Track.
['Ian Soboroff' 'Donna Harman']


 0


69 / 569
One option
70 / 569
no matches
71 / 569
One option
72 / 569
https://openalex.org/W2125135118
https://openalex.org/W195872438
Fondazione Ugo Bordoni at TREC 2003: Robust and Web Track.
['Giambattista Amati' 'Claudio Carpineto' 'Giovanni Romano']


 0


73 / 569
One option
74 / 569
One option
75 / 569
One option
76 / 569
One option
77 / 569
no matches
78 / 569
One option
79 / 569
no matches
80 / 569
One option
81 / 569
One option
82 / 569
One option
83 / 569
One option
84 / 569
no matches
85 / 569
no matches
86 / 569
https://openalex.org/W2096623622
https://openalex.org/W1493399377
https://openalex.org/W2123273303
https://openalex.org/W60779023
https://openalex.org/W95791645
https://openalex.org/W2185884126
https://openalex.org/W159389352
https://openalex.org/W2751100699
Overview of TREC 2004.
['Ellen M. Voorhees']


 0


87 / 569
One option
88 / 569
https://openalex.org/W1493399377
https://openalex.org/W2185884126
HARD Track Overview in TREC 2004 - High Accuracy Retrieval from Documents.
['James Allan']


 0


89 / 569
no matches
90 / 569
no matches
91 / 569
no matches
92 / 569
One option
93 / 569
One option
94 / 569
One option
95 / 569
One option
96 / 569
One option
97 / 569
One option
98 / 569
no matches
99 / 569
One option
100 / 569
One option
101 / 569
no matches
102 / 569
no matches
103 / 569
One option
104 / 569
One option
105 / 569
One option
106 / 569
One option
107 / 569
One option
108 / 569
One option
109 / 569
One option
110 / 569
One option
111 / 569
One option
112 / 569
One option
113 / 569
One option
114 / 569
https://openalex.org/W2171278444
https://openalex.org/W1598072270
The Lowlands' TREC Experiments 2005.
['Henning Rode' 'Djoerd Hiemstra' 'Georgina Ramírez' 'Thijs Westerveld'
 'Arjen P. de Vries']


 0


115 / 569
One option
116 / 569
One option
117 / 569
One option
118 / 569
One option
119 / 569
One option
120 / 569
One option
121 / 569
One option
122 / 569
One option
123 / 569
One option
124 / 569
https://openalex.org/W2170339378
https://openalex.org/W2134289386
Report on the TREC 2005 Experiment: Genomics Track.
['Patrick Ruch' 'Frédéric Ehrler' 'Samir Abdou' 'Jacques Savoy']


 0


125 / 569
One option
126 / 569
One option
127 / 569
One option
128 / 569
no matches
129 / 569
One option
130 / 569
One option
131 / 569
https://openalex.org/W1503333931
https://openalex.org/W2611071287
https://openalex.org/W38978401
https://openalex.org/W2059126068
https://openalex.org/W77404731
https://openalex.org/W2917293077
https://openalex.org/W2123545015
https://openalex.org/W2917688635
https://openalex.org/W2915983684
https://openalex.org/W2917229916
Overview of the TREC 2006.
['Ellen M. Voorhees']


 4


132 / 569
One option
133 / 569
One option
134 / 569
One option
135 / 569
One option
136 / 569
no matches
137 / 569
One option
138 / 569
no matches
139 / 569
One option
140 / 569
One option
141 / 569
One option
142 / 569
One option
143 / 569
no matches
144 / 569
One option
145 / 569
One option
146 / 569
One option
147 / 569
no matches
148 / 569
https://openalex.org/W178065327
https://openalex.org/W2028065606
https://openalex.org/W152079047
Question Answering Experiments and Resources.
['Boris Katz' 'Gregory Marton' 'Sue Felshin' 'Daniel Loreto' 'Ben Lu'
 'Federico Mora' 'Özlem Uzuner' 'Michael McGraw-Herdeg' 'Natalie Cheung'
 'Alexey Radul' 'Yuan Kui Shen' 'Yuan Luo' 'Gabriel Zaccak']


 0


149 / 569
One option
150 / 569
One option
151 / 569
One option
152 / 569
One option
153 / 569
https://openalex.org/W2042203167
https://openalex.org/W1507492629
https://openalex.org/W2018448379
https://openalex.org/W1515002215
https://openalex.org/W2610619540
https://openalex.org/W2785689413
https://openalex.org/W2283073075
https://openalex.org/W2804532666
https://openalex.org/W1972952584
https://openalex.org/W1544219492
https://openalex.org/W4255604262
https://openalex.org/W2063878244
https://openalex.org/W2749739073
https://openalex.org/W2736163813
https://openalex.org/W4301085155
https://openalex.org/W2012459642
https://openalex.org/W599134743
https://openalex.org/W2411091188
https://openalex.org/W16576661
https://openalex.org/W1978759061
https://openalex.org/W2619291334
https://openalex.org/W1973465898
https://openalex.org/W4210946305
https://openalex.org/W3036145734
https://openalex.org/W4235430246
The Robert Gordon University.
['Malcolm Clark' 'Ulises Cerviño Beresi' 'Stuart N. K.

 -


154 / 569
One option
155 / 569
One option
156 / 569
https://openalex.org/W1692172229
https://openalex.org/W2103194124
Expanding Queries Using Multiple Resources.
['Edgar Meij' 'Maarten de Rijke' 'Machiel Jansen']


 -


157 / 569
One option
158 / 569
no matches
159 / 569
no matches
160 / 569
no matches
161 / 569
One option
162 / 569
One option
163 / 569
One option
164 / 569
One option
165 / 569
no matches
166 / 569
One option
167 / 569
One option
168 / 569
One option
169 / 569
One option
170 / 569
https://openalex.org/W2593172760
https://openalex.org/W2899701551
Overview of the TREC 2007 Question Answering Track.
['Hoa Trang Dang' 'Diane Kelly' 'Jimmy Lin']


 -


171 / 569
One option
172 / 569
One option
173 / 569
One option
174 / 569
One option
175 / 569
One option
176 / 569
One option
177 / 569
One option
178 / 569
One option
179 / 569
One option
180 / 569
no matches
181 / 569
One option
182 / 569
One option
183 / 569
One option
184 / 569
One option
185 / 569
One option
186 / 569
no matches
187 / 569
no matches
188 / 569
no matches
189 / 569
https://openalex.org/W2118632115
https://openalex.org/W2555213299
UMass Complex Interactive Question Answering (ciQA) 2007: Human Performance as Question Answerers.
['Mark D. Smucker' 'James Allan' 'Blagovest Dachev']


 0


190 / 569
One option
191 / 569
One option
192 / 569
One option
193 / 569
One option
194 / 569
One option
195 / 569
One option
196 / 569
One option
197 / 569
One option
198 / 569
One option
199 / 569
no matches
200 / 569
One option
201 / 569
One option
202 / 569
https://openalex.org/W2092020620
https://openalex.org/W2105293186
Where to Stop Reading a Ranked List?
['Avi Arampatzis' 'Jaap Kamps']


 1


203 / 569
One option
204 / 569
One option
205 / 569
no matches
206 / 569
One option
207 / 569
One option
208 / 569
One option
209 / 569
One option
210 / 569
One option
211 / 569
One option
212 / 569
no matches
213 / 569
One option
214 / 569
One option
215 / 569
One option
216 / 569
One option
217 / 569
https://openalex.org/W1205839182
https://openalex.org/W287654039
Mining Specific and General Features in Both Positive and Negative Relevance Feedback.
['Yuefeng Li' 'Xiaohui Tao' 'Abdulmohsen Algarni' 'Sheng-Tang Wu']


 0


218 / 569
One option
219 / 569
One option
220 / 569
no matches
221 / 569
One option
222 / 569
https://openalex.org/W2119875355
https://openalex.org/W2123032859
Result Diversity and Entity Ranking Experiments: Anchors, Links, Text and Wikipedia.
['Rianne Kaptein' 'Marijn Koolen' 'Jaap Kamps']


 0


223 / 569
One option
224 / 569
One option
225 / 569
no matches
226 / 569
no matches
227 / 569
One option
228 / 569
One option
229 / 569
One option
230 / 569
One option
231 / 569
One option
232 / 569
https://openalex.org/W2399793724
https://openalex.org/W2405178010
https://openalex.org/W2916910287
TREC 2010 Blog Track: Top Stories Identification.
['Yeha Lee' 'Woosang Song' 'Hun-Young Jung' 'Vinh Tao Thanh'
 'Jong-Hyeok Lee']


 2


233 / 569
One option
234 / 569
no matches
235 / 569
https://openalex.org/W2916518273
https://openalex.org/W2185996954
UCD SIFT in the TREC 2010 Web Track.
['David Leonard' 'Lusheng Zhang' 'David Lillis' 'Fergus Toolan'
 'Rem W. Collier' 'John Dunnion']


 1


236 / 569
One option
237 / 569
One option
238 / 569
One option
239 / 569
One option
240 / 569
no matches
241 / 569
One option
242 / 569
One option
243 / 569
One option
244 / 569
no matches
245 / 569
no matches
246 / 569
One option
247 / 569
One option
248 / 569
https://openalex.org/W2003018928
https://openalex.org/W2617920142
https://openalex.org/W2394859670
https://openalex.org/W2909490623
https://openalex.org/W2605149786
https://openalex.org/W2407399750
https://openalex.org/W2789044833
https://openalex.org/W2979770634
https://openalex.org/W2787537954
https://openalex.org/W2968269360
https://openalex.org/W2996788538
https://openalex.org/W2293713370
https://openalex.org/W2554825521
https://openalex.org/W2928627654
https://openalex.org/W4286815418
https://openalex.org/W1034866298
Query Expansion for Microblog Retrieval.
['Ayan Bandyopadhyay' 'Mandar Mitra' 'Prasenjit Majumder']


 2


249 / 569
One option
250 / 569
One option
251 / 569
One option
252 / 569
One option
253 / 569
One option
254 / 569
One option
255 / 569
https://openalex.org/W221700457
https://openalex.org/W2401309922
Search for Clinical Records: RMIT at Medical TREC.
['Iman Amini' 'Mark Sanderson' 'David Martínez' 'Xiaodong Li']


 1


256 / 569
One option
257 / 569
One option
258 / 569
One option
259 / 569
One option
260 / 569
One option
261 / 569
One option
262 / 569
One option
263 / 569
https://openalex.org/W2294880871
https://openalex.org/W296548042
University of Glasgow at Medical Records Track: Experiments with Terrier.
['Nut Limsopatham' 'Craig Macdonald' 'Iadh Ounis' 'Graham McDonald'
 'Matt-Mouley Bouamrane']


 0


264 / 569
One option
265 / 569
no matches
266 / 569
no matches
267 / 569
One option
268 / 569
One option
269 / 569
One option
270 / 569
no matches
271 / 569
One option
272 / 569
One option
273 / 569
https://openalex.org/W2399079233
https://openalex.org/W2735971815
Overview of the TREC-2012 Microblog Track.
['Ian Soboroff' 'Iadh Ounis' 'Craig Macdonald' 'Jimmy Lin']


 -


274 / 569
no matches
275 / 569
One option
276 / 569
no matches
277 / 569
no matches
278 / 569
no matches
279 / 569
https://openalex.org/W2739957794
https://openalex.org/W36361912
https://openalex.org/W2398973937
https://openalex.org/W2185885129
https://openalex.org/W3186464731
https://openalex.org/W2174768191
https://openalex.org/W1990387666
https://openalex.org/W2090909419
https://openalex.org/W1988144039
https://openalex.org/W1878790362
https://openalex.org/W2798383927
https://openalex.org/W1662336185
https://openalex.org/W2405827643
https://openalex.org/W2346242649
https://openalex.org/W214935557
https://openalex.org/W2150461528
https://openalex.org/W2890071448
https://openalex.org/W2574111519
https://openalex.org/W269085259
https://openalex.org/W2133777313
https://openalex.org/W2040380996
https://openalex.org/W994503675
https://openalex.org/W4324044803
https://openalex.org/W2151119072
https://openalex.org/W2604709039
Contextual Suggestion.
['Abhishek Mallik' 'Mandar Mitra' 'Kripaba

 -


280 / 569
One option
281 / 569
One option
282 / 569
One option
283 / 569
One option
284 / 569
One option
285 / 569
no matches
286 / 569
One option
287 / 569
One option
288 / 569
https://openalex.org/W2395265779
https://openalex.org/W341479092
https://openalex.org/W2394797107
https://openalex.org/W990735751
Evaluating Stream Filtering for Entity Profile Updates for TREC 2013.
['John R. Frank' 'Steven J. Bauer' 'Max Kleiman-Weiner'
 'Daniel A. Roberts' 'Nilesh Tripuraneni' 'Ce Zhang' 'Christopher Ré'
 'Ellen M. Voorhees' 'Ian Soboroff']


 0


289 / 569
One option
290 / 569
One option
291 / 569
no matches
292 / 569
One option
293 / 569
no matches
294 / 569
One option
295 / 569
One option
296 / 569
One option
297 / 569
One option
298 / 569
One option
299 / 569
One option
300 / 569
https://openalex.org/W2917201903
https://openalex.org/W2181976051
Overview of the TREC 2014 Federated Web Search Track.
['Thomas Demeester' 'Dolf Trieschnigg' 'Dong Nguyen' 'Djoerd Hiemstra'
 'Ke Zhou']


 -


301 / 569
One option
302 / 569
https://openalex.org/W1483624827
https://openalex.org/W2734889908
https://openalex.org/W2917201903
https://openalex.org/W2181976051
TREC 2014 Web Track Overview.
['Kevyn Collins-Thompson' 'Craig Macdonald' 'Paul N. Bennett'
 'Fernando Diaz' 'Ellen M. Voorhees']


 -


303 / 569
no matches
304 / 569
One option
305 / 569
One option
306 / 569
One option
307 / 569
One option
308 / 569
https://openalex.org/W4298858394
https://openalex.org/W990215599
IRIT at TREC Temporal Summarization 2014.
['Rafik Abbes' 'Karen Pinel-Sauvagnat' 'Nathalie Hernandez'
 'Mohand Boughanem']


 -


309 / 569
no matches
310 / 569
One option
311 / 569
One option
312 / 569
One option
313 / 569
One option
314 / 569
One option
315 / 569
One option
316 / 569
One option
317 / 569
One option
318 / 569
One option
319 / 569
One option
320 / 569
One option
321 / 569
One option
322 / 569
One option
323 / 569
One option
324 / 569
One option
325 / 569
One option
326 / 569
One option
327 / 569
One option
328 / 569
One option
329 / 569
One option
330 / 569
no matches
331 / 569
One option
332 / 569
One option
333 / 569
One option
334 / 569
One option
335 / 569
One option
336 / 569
One option
337 / 569
One option
338 / 569
One option
339 / 569
One option
340 / 569
One option
341 / 569
One option
342 / 569
One option
343 / 569
One option
344 / 569
no matches
345 / 569
One option
346 / 569
One option
347 / 569
One option
348 / 569
One option
349 / 569
One option
350 / 569
One option
351 / 569
One option
352 / 569
One option
353 / 569
https://openalex.org/W3013947117
https://openalex.org/W2995970948


 -


354 / 569
One option
355 / 569
One option
356 / 569
One option
357 / 569
One option
358 / 569
One option
359 / 569
One option
360 / 569
https://openalex.org/W3176967269
https://openalex.org/W3147642179
TREC 2020 Podcasts Track Overview.
['Rosie Jones' 'Ben Carterette' 'Ann Clifton' 'Jussi Karlgren'
 'Aasish Pappu' 'Sravana Reddy' 'Yongze Yu' 'Maria Eskevich'
 'Gareth J. F. Jones']


 0


361 / 569
https://openalex.org/W4287623114
https://openalex.org/W3173904029
Overview of the TREC 2020 Precision Medicine Track.
['Kirk Roberts' 'Dina Demner-Fushman' 'Ellen M. Voorhees' 'Steven Bedrick'
 'William R. Hersh']


 -


362 / 569
One option
363 / 569
One option
364 / 569
https://openalex.org/W3175111331
https://openalex.org/W3174723227
https://openalex.org/W3106510880
https://openalex.org/W3175791145
https://openalex.org/W3175663330
https://openalex.org/W3176940817
https://openalex.org/W3175649392
https://openalex.org/W3177156676
https://openalex.org/W3173163786
https://openalex.org/W3173211693
https://openalex.org/W3174539962
https://openalex.org/W3176877399
https://openalex.org/W4287750363
https://openalex.org/W3038620230
2020 Deep Learning Track.
['Tiago Almeida' 'Sérgio Matos']


 4


365 / 569
One option
366 / 569
One option
367 / 569
One option
368 / 569
One option
369 / 569
One option
370 / 569
One option
371 / 569
One option
372 / 569
https://openalex.org/W3111892376
https://openalex.org/W3176082536
https://openalex.org/W4287557031
CUED_SPEECH at TREC 2020 Podcast Summarisation Track.
['Potsawee Manakul' 'Mark J. F. Gales']


 -


373 / 569
One option
374 / 569
One option
375 / 569
One option
376 / 569
One option
377 / 569
One option
378 / 569
One option
379 / 569
One option
380 / 569
One option
381 / 569
One option
382 / 569
One option
383 / 569
One option
384 / 569
One option
385 / 569
One option
386 / 569
https://openalex.org/W3134362558
https://openalex.org/W3174284527
LRG at TREC 2020: Document Ranking with XLNet-Based Models.
['Abheesht Sharma' 'Harshit Pandey']


 -


387 / 569
One option
388 / 569
One option
389 / 569
One option
390 / 569
One option
391 / 569
One option
392 / 569
One option
393 / 569
One option
394 / 569
One option
395 / 569
One option
396 / 569
One option
397 / 569
One option
398 / 569
One option
399 / 569
One option
400 / 569
One option
401 / 569
One option
402 / 569
One option
403 / 569
One option
404 / 569
One option
405 / 569
https://openalex.org/W3140985030
https://openalex.org/W3173145038
Spotify at TREC 2020: Genre-Aware Abstractive Podcast Summarization.
['Rezvaneh Rezapour' 'Sravana Reddy' 'Ann Clifton' 'Rosie Jones']


 1


406 / 569
One option
407 / 569
One option
408 / 569
One option
409 / 569
One option
410 / 569
One option
411 / 569
https://openalex.org/W3134113364
https://openalex.org/W3176023624
Multi-task Transfer Learning for Finding Actionable Information from Crisis-related Messages on Social Media.
['Congcong Wang' 'David Lillis']


 -


412 / 569
One option
413 / 569
One option
414 / 569
One option
415 / 569
One option
416 / 569
One option
417 / 569
One option
418 / 569
One option
419 / 569
One option
420 / 569
One option
421 / 569
One option
422 / 569
https://openalex.org/W3175571162
https://openalex.org/W3130833176
https://openalex.org/W4287326887
Leveraging Query Resolution and Reading Comprehension for Conversational Passage Retrieval.
['Svitlana Vakulenko' 'Nikos Voskarides' 'Zhucheng Tu' 'Shayne Longpre']


 -


423 / 569
One option
424 / 569
One option
425 / 569
One option
426 / 569
One option
427 / 569
One option
428 / 569
One option
429 / 569
no matches
430 / 569
no matches
431 / 569
One option
432 / 569
no matches
433 / 569
no matches
434 / 569
no matches
435 / 569
no matches
436 / 569
One option
437 / 569
no matches
438 / 569
no matches
439 / 569
no matches
440 / 569
no matches
441 / 569
One option
442 / 569
no matches
443 / 569
no matches
444 / 569
no matches
445 / 569
One option
446 / 569
no matches
447 / 569
no matches
448 / 569
no matches
449 / 569
no matches
450 / 569
https://openalex.org/W4307334198
https://openalex.org/W4226470515
https://openalex.org/W3215030938
Quality and Cost Trade-Offs in Passage Re-Ranking Task.
['Pavel Podberezko' 'Vsevolod Mitskevich' 'Raman Makouski'
 'Pavel Goncharov' 'Andrei Khobnia' 'Nikolay Bushkov'
 'Marina Chernyshevich']


 -


451 / 569
One option
452 / 569
no matches
453 / 569
no matches
454 / 569
no matches
455 / 569
no matches
456 / 569
no matches
457 / 569
One option
458 / 569
One option
459 / 569
One option
460 / 569
no matches
461 / 569
no matches
462 / 569
no matches
463 / 569
no matches
464 / 569
no matches
465 / 569
no matches
466 / 569
no matches
467 / 569
no matches
468 / 569
no matches
469 / 569
no matches
470 / 569
no matches
471 / 569
no matches
472 / 569
no matches
473 / 569
no matches
474 / 569
no matches
475 / 569
no matches
476 / 569
no matches
477 / 569
no matches
478 / 569
no matches
479 / 569
no matches
480 / 569
https://openalex.org/W4308606920
https://openalex.org/W4226161833
UCD-CS at TREC 2021 Incident Streams Track.
['Congcong Wang' 'David Lillis']


 -


481 / 569
no matches
482 / 569
no matches
483 / 569
no matches
484 / 569
no matches
485 / 569
One option
486 / 569
no matches
487 / 569
no matches
488 / 569
One option
489 / 569
no matches
490 / 569
One option
491 / 569
no matches
492 / 569
no matches
493 / 569
no matches
494 / 569
no matches
495 / 569
no matches
496 / 569
no matches
497 / 569
One option
498 / 569
One option
499 / 569
no matches
500 / 569
One option
501 / 569
One option
502 / 569
no matches
503 / 569
no matches
504 / 569
no matches
505 / 569
no matches
506 / 569
no matches
507 / 569
no matches
508 / 569
no matches
509 / 569
no matches
510 / 569
no matches
511 / 569
no matches
512 / 569
no matches
513 / 569
no matches
514 / 569
One option
515 / 569
no matches
516 / 569
One option
517 / 569
no matches
518 / 569
One option
519 / 569
no matches
520 / 569
no matches
521 / 569
no matches
522 / 569
no matches
523 / 569
no matches
524 / 569
no matches
525 / 569
no matches
526 / 569
no matches
527 / 569
no matches
528 / 569
no 

 -


559 / 569
no matches
560 / 569
no matches
561 / 569
no matches
562 / 569
no matches
563 / 569
https://openalex.org/W3013372641
https://openalex.org/W4287822877
CAsT 2019: The Conversational Assistance Track Overview
['Jeffrey Dalton' 'Chenyan Xiong' 'Jamie Callan']


 -


564 / 569
no matches
565 / 569
https://openalex.org/W3130740619
https://openalex.org/W3011794880
Overview of the TREC 2019 Deep Learning Track
['Nick Craswell' 'Bhaskar Mitra' 'Emine Yilmaz' 'Daniel Campos'
 'Ellen M. Voorhees']


 -


566 / 569
no matches
567 / 569
One option
568 / 569
One option
569 / 569
no matches


## Manually add still missing OpenAlex documents

In [13]:
# Filter records that are in df_no_mag_not_found but not in documents_without_MAG
df_no_mag_missing_records = df_no_mag_not_found[~df_no_mag_not_found["ID"].isin(list(documents_without_MAG.keys()))]

# Dictionary to store records that are still missing
final_missing_records  = {}

# Iterate over each missing record in the filtered dataframe
for i , j in df_no_mag_not_found_still.iterrows():

    # Clean the title by removing commas
    title = j["Title"].replace(",", "")
    # URL-encode the title for the search query
    title_encoded = urllib.parse.quote(title)
    
    # Search for the record using the encoded title
    work = Works().search_filter(title=title_encoded).get()    
        
    if len(work) == 0:
        # If no results are found, prompt the user to provide an ID
        print(j["Title"])
        print(j["PubYear"])
        print(j["Authors"])
        input_id  = input("Enter OpenAlex ID (or '-' to skip): ")
        if input_id  != "-":
            # Store the record in final_missing_records if an ID is provided
            final_missing_records[j["ID"]] = Works()["W" + input_id ]
    

1 / 222
Question Answering with LCC's CHAUCER at TREC 2006.
2006
['Andrew Hickl' 'John Williams' 'Jeremy Bensley' 'Kirk Roberts' 'Ying Shi'
 'Bryan Rink']


 -


2 / 222
3 / 222
4 / 222
5 / 222
6 / 222
7 / 222
University of Glasgow at TREC 2006: Experiments in Terabyte and Enterprise Tracks with Terrier.
2006
['Christina Lioma' 'Craig Macdonald' 'Vassilis Plachouras' 'Jie Peng'
 'Ben He' 'Iadh Ounis']


 -


8 / 222
Report on the TREC 2006 Experiment: Genomics Track.
2006
['Patrick Ruch' 'Frédéric Ehrler' 'Julien Gobeill' 'Imad Tbahriti'
 'Antonio Jimeno-Yepes']


 -


9 / 222
UIC at TREC 2006 Blog Track.
2006
['Wei Zhang' 'Clement T. Yu']


 -


10 / 222
11 / 222
12 / 222
13 / 222
The University of Sheffield's TREC 2006 Q&A Experiments.
2006
['Mark A. Greenwood' 'Mark Stevenson' 'Robert J. Gaizauskas']


 -


14 / 222
15 / 222
16 / 222
17 / 222
18 / 222
19 / 222
20 / 222
21 / 222
22 / 222
23 / 222
24 / 222
THUIR at TREC 2007: Enterprise Track.
2007
['Yupeng Fu' 'Yufei Xue' 'Tong Zhu' 'Yiqun Liu' 'Min Zhang' 'Shaoping Ma']


 -


25 / 222
26 / 222
27 / 222
Language Modeling Approaches to Blog Postand Feed Finding.
2007
['Breyten Ernsting' 'Wouter Weerkamp' 'Maarten de Rijke']


 2141672179


28 / 222
University of Glasgow at TREC 2007: Experiments in Blog and Enterprise Tracks with Terrier.
2007
['David Hannah' 'Craig Macdonald' 'Jie Peng' 'Ben He' 'Iadh Ounis']


 -


29 / 222
UIC at TREC 2007 Blog Track.
2007
['Wei Zhang' 'Clement T. Yu']


 -


30 / 222
31 / 222
32 / 222
33 / 222
THUIR at TREC 2008: Enterprise Track.
2008
['Yufei Xue' 'Tong Zhu' 'Guichun Hua' 'Min Zhang' 'Yiqun Liu'
 'Shaoping Ma']


 -


34 / 222
UIC at TREC 208 Blog Track.
2008
['Lifeng Jia' 'Clement T. Yu' 'Wei Zhang']


 2124906298


35 / 222
36 / 222
ICTNET at Web Track 2009 Diversity Track.
2009
['Wenjing Bi' 'Xiaoming Yu' 'Yue Liu' 'Feng Guan' 'Zeying Peng'
 'Hongbo Xu' 'Xueqi Cheng']


 -


37 / 222
38 / 222
39 / 222
40 / 222
Related Entity Finding Based on Co-Occurance.
2009
['Marc Bron' 'Krisztian Balog' 'Maarten de Rijke']


 2143405681


41 / 222
42 / 222
43 / 222
44 / 222
University of Twente @ TREC 2009: Indexing Half a Million Web Pages.
2009
['Claudia Hauff' 'Djoerd Hiemstra']


 -


45 / 222
Webis at the TREC 2010 Sessions Track.
2010
['Matthias Hagen' 'Benno Stein' 'Michael Völske']


 -


46 / 222
47 / 222
48 / 222
Mining Specific and General Features in Both Positive and Negative Relevance Feedback: QUT E-Discovery Lab at the TREC 2010 Relevance Feedback Track.
2010
['Abdulmohsen Algarni' 'Yuefeng Li' 'Xiaohui Tao']


 -


49 / 222
50 / 222
51 / 222
Webis at the TREC 2011 Session Track.
2011
['Matthias Hagen' 'Jan Graßegger' 'Maximilian Michel' 'Benno Stein']


 -


52 / 222
53 / 222
54 / 222
ICTNET at Session Track TREC 2011.
2011
['Mingxhuan Wei' 'Yuanhai Xue' 'Chen Xu' 'Xiaoming Yu' 'Yue Liu'
 'Xueqi Cheng']


 -


55 / 222
ICTNET at Web Track 2011 Diversity Track.
2011
['Shengxian Wan' 'Yuanhai Xue' 'Xiaoming Yu' 'Feng Guan' 'Yue Liu'
 'Xueqi Cheng']


 -


56 / 222
57 / 222
58 / 222
59 / 222
60 / 222
61 / 222
62 / 222
63 / 222
64 / 222
University of Indonesia at TREC 2011 Microblog Task.
2011
['Samuel Louvan' 'Mochamad Ibrahim' 'Mirna Adriani' 'Clara Vania'
 'Bayu Distiawan' 'Metti Zakaria Wanagiri']


 2406898093


65 / 222
Fasikom UI at TREC 2011 Entity List Completion Task.
2011
['Ananda Budi Prasetya' 'Hapnes Toba' 'Mirna Adriani'
 'Hisar Maruli Manurung']


 2405016982


66 / 222
67 / 222
68 / 222
Cohort Shepherd: Discoving Cohort Traits from Hospital Visits.
2011
['Travis R. Goodwin' 'Bryan Rink' 'Kirk Roberts' 'Sanda M. Harabagiu']


 -


69 / 222
70 / 222
PRIS at TREC 2012 Contextual Suggestion Track.
2012
['Lin Qiu' 'JunRui Peng' 'Qianqian Wang' 'Yue Liu' 'Zhihua Zhou'
 'Weiran Xu' 'Guang Chen' 'Jun Guo']


 -


71 / 222
ICTNET at Session Track TREC 2012.
2012
['Zhenhong Chen' 'Mingchuan Wei' 'Junxiao Nan' 'Jun Chen' 'Xiaoming Yu'
 'Yue Liu' 'Xueqi Cheng']


 -


72 / 222
ICTNET at Web Track 2012 Diversity Task.
2012
['Zilong Feng' 'Yuanhai Xue' 'Xiaoming Yu' 'Hongbo Xu' 'Yue Liu'
 'Xueqi Cheng']


 -


73 / 222
DCU@TRECMed 2012: Using adhoc Baselines for Domain-Specific Retrieval.
2012
['Johannes Leveling' 'Lorraine Goeuriot' 'Liadh Kelly'
 'Gareth J. F. Jones']


 -


74 / 222
75 / 222
76 / 222
77 / 222
QUT_Para at TREC 2012 Web Track: Word Associations for Retrieving Web Documents.
2012
['Mike Symonds' 'Guido Zuccon' 'Bevan Koopman' 'Peter Bruza']


 2163647195


78 / 222
79 / 222
80 / 222
CWI and TU Delft Notebook TREC 2013: Contextual Suggestion, Federated Web Search, KBA, and Web Tracks.
2013
['Alejandro Bellogín' 'Gebrekirstos G. Gebremeskel' 'Jiyin He' 'Alan Said'
 'Thaer Samar' 'Arjen P. de Vries' 'Jimmy Lin' 'Jeroen B. P. Vuurens']


 2289557876


81 / 222
ICTNET at Session Track TREC 2013.
2013
['Zhenhong Chen' 'Long Xia' 'Xiaoming Yu' 'Yue Liu' 'Xueqi Cheng']


 -


82 / 222
83 / 222
84 / 222
85 / 222
Full-texts representation with Medical Subject Headings, and co-citations network rerank- ing strategies for TREC 2014 Clinical Decision Support Track.
2014
['Julien Gobeill' 'Arnaud Gaudinat' 'Emilie Pasche' 'Patrick Ruch']


 -


86 / 222
87 / 222
88 / 222
89 / 222
Siena's Twitter Information Retrieval System: The 2014 Microblog Track.
2014
['Timothy LaRock' 'Lauren Mathews' 'Matthew Roberts' 'Darren Lim'
 'Sharon G. Small']


 -


90 / 222
91 / 222
92 / 222
Overview of the TREC 2017 Precision Medicine Track.
2017
['Kirk Roberts' 'Dina Demner-Fushman' 'Ellen M. Voorhees'
 'William R. Hersh' 'Steven Bedrick' 'Alexander J. Lazar' 'Shubham Pant']


 -


93 / 222
94 / 222
IRIT at TREC Real-Time Summarization 2018.
2018
['Abdelhamid Chellal' 'Mohand Boughanem']


 -


95 / 222
96 / 222
97 / 222
98 / 222
99 / 222
100 / 222
101 / 222
102 / 222
103 / 222
104 / 222
105 / 222
TREC CAsT 2021: The Conversational Assistance Track Overview.
2021
['Jeffrey Dalton' 'Chenyan Xiong' 'Jamie Callan']


 -


106 / 222
Overview of the TREC 2021 Deep Learning Track.
2021
['Nick Craswell' 'Bhaskar Mitra' 'Emine Yilmaz' 'Daniel Campos'
 'Jimmy Lin']


 -


107 / 222
Overview of the TREC 2021 Health Misinformation Track.
2021
['Charles L. A. Clarke' 'Maria Maistro' 'Mark D. Smucker']


 -


108 / 222
TREC 2021 Podcasts Track Overview.
2021
['Jussi Karlgren' 'Rosie Jones' 'Ben Carterette' 'Ann Clifton'
 'Edgar Tanaka' 'Maria Eskevich' 'Gareth J. F. Jones' 'Sravana Reddy']


 -


109 / 222
Alibaba DAMO Academy at TREC Clinical Trials 2021: ExploringEmbedding-based First-stage Retrieval with TrialMatcher.
2021
['Qiao Jin' 'Chuanqi Tan' 'Zhengyun Zhao' 'Zheng Yuan' 'Songfang Huang']


 -


110 / 222
SIB Text Mining at TREC Clinical Trials 2021.
2021
['Déborah Caucheteur' 'Emilie Pasche' 'Luc Mottin' 'Anaïs Mottaz'
 'Julien Gobeill' 'Patrick Ruch']


 -


111 / 222
An Exploration Study of Multi-stage Conversational Passage Retrieval: Paraphrase Query Expansion and Multi-view Point-wise Ranking.
2021
['Jia-Huei Ju' 'Chih-Ting Yeh' 'Cheng-Wei Lin' 'Chia-Ying Tsao'
 'Jun-En Ding' 'Chuan-Ju Wang' 'Ming-Feng Tsai']


 -


112 / 222
CincyMedIR at TREC 2021 Clinical Trial Track.
2021
['Hoang Vu' 'Danny T. Y. Wu']


 -


113 / 222
CIP at TREC 2021 Deep Learning Track.
2021
['Xuanang Chen' 'Ben He' 'Le Sun' 'Yingfei Sun']


 -


114 / 222
CiTIUS at the TREC 2021 Health Misinformation Track.
2021
['Marcos Fernández-Pichel' 'Manuel de Prada Corral' 'David E. Losada'
 'Juan Carlos Pichel' 'Pablo Gamallo']


 -


115 / 222
116 / 222
Query Rewriting with Expansion and Multi-Turn Entity Graphs for Answer Selection.
2021
['Nour Jedidi' 'Gustavo Gonçalves' 'Jamie Callan']


 -


117 / 222
Finding Context through Utterance Dependencies in Search Conversations - Participation of the CNR Team in CAsT 2021.
2021
['Ida Mele' 'Cristina Ioana Muntean' 'Franco Maria Nardini'
 'Raffaele Perego' 'Nicola Tonellotto']


 -


118 / 222
CSIROmed Team Report of TREC 2021 Clinical Trials track: Experiments with BERT Reranking Methods.
2021
['Maciej Rybinski' 'Vincent Nguyen' 'Sarvnaz Karimi']


 -


119 / 222
DOSSIER at TREC 2021 Clinical Trials Track.
2021
['Wojciech Kusa' 'Yasin Ghafourian']


 -


120 / 222
University of Hagen @ TREC2021 News Track.
2021
['Stefan Wagenpfeil' 'Matthias L. Hemmje' 'Paul Mc Kevitt']


 -


121 / 222
Clinical Trial Search Using Lucene and UMLS.
2021
['Yanqing Ji' 'Yun Tian' 'Hao Ying' 'John Tran']


 -


122 / 222
IBM @ TREC Clinical Trials Track 2021.
2021
['Laura Biester' 'Venkata Joopudi' 'Bharath Dandala']


 -


123 / 222
124 / 222
Filter, Transform, Expand, and Fuse The IMS Unipd at TREC 2021 Clinical Trials.
2021
['Giorgio Maria Di Nunzio' 'Guglielmo Faggioli' 'Stefano Marchesin']


 -


125 / 222
IRCologne at TREC 2021 News Track Relation-based re-ranking for background linking.
2021
['Björn Engelmann' 'Philipp Schaer']


 -


126 / 222
IRLab-Amsterdam at TREC 2021 Conversational Assistant Track.
2021
['Antonios Minas Krasakis' 'Evangelos Kanoulas']


 -


127 / 222
The University of Amsterdam at the TREC 2021 Fair Ranking Track.
2021
['Ali Vardasbi' 'Gabriel Bénédict' 'Shashank Gupta' 'Maria Heuss'
 'Pooya Khandel' 'Ming Li' 'Fatemeh Sarvi']


 -


128 / 222
TREC 2021⋆ Clinical Trials Retrieval, Duisburg-Essen University submission.
2021
['Sameh Frihat' 'Norbert Fuhr']


 -


129 / 222
L3S at the TREC 2021 Deep Learning Track.
2021
['Jurek Leonhardt' 'Avishek Anand' 'Koustav Rudra']


 -


130 / 222
Middlebury at TREC News '21 Exploring Learning to Rank Model Variants.
2021
['Culton Koster' 'John Foley']


 -


131 / 222
MLIA-LIP6@TREC-CAST2021: Feature augmentation for query recontextualization and passage ranking.
2021
['Nawel Astaouti' 'Thomas Gerald' 'Maya Touzari' 'Laure Soulier'
 'Jian-Yun Nie']


 -


132 / 222
Method Comparison for Crisis Pipelines.
2021
['Shivam Sharma' 'Cody Buntain']


 -


133 / 222
Naver Labs Europe (SPLADE) @ TREC Deep Learning 2021.
2021
['Carlos Lassance' 'Arnaud Sors' 'Stéphane Clinchant' 'Thibault Formal'
 'Benjamin Piwowarski']


 -


134 / 222
Hybrid Re-ranking for Biomedical Information Retrieval at the TREC 2021 Clinical Trials Track.
2021
['Ming-Xuan Shi' 'Tsung-Hsuan Pan' 'Hsin-Hsi Chen' 'Hen-Hsen Huang']


 -


135 / 222
PASH at TREC 2021 Deep Learning Track: Generative Enhanced Model for Multi-stageRankingtrack: DL.
2021
['Yixuan Qiao' 'Hao Chen' 'Tuozhen Liu' 'Xianbin Ye' 'Jun Wang' 'Peng Gao'
 'Guotong Xie']


 -


136 / 222
PoliTO at TREC 2021 Podcast Summarization Track.
2021
['Lorenzo Vaiani' 'Moreno La Quatra' 'Luca Cagliero' 'Paolo Garza']


 -


137 / 222
Pozna'n Contribution to TREC Clinical Trials 2021⋆.
2021
['Jakub Dutkiewicz' 'Czeslaw Jedrzejek']


 -


138 / 222
bigIR at TREC 2021: Adopting Transfer Learning for News Background Linking.
2021
['Marwa Essam' 'Tamer Elsayed']


 -


139 / 222
RMIT at TREC 2021 Fair Ranking Track.
2021
['Sachin Pathiyan Cherumanal' 'Damiano Spina' 'Falk Scholer'
 'W. Bruce Croft']


 -


140 / 222
Radboud University at TREC CAsT 2021.
2021
['Hideaki Joko' 'Emma J. Gerritse' 'Faegheh Hasibi' 'Arjen P. de Vries']


 -


141 / 222
Siena's Incident Stream System SISS.
2021
['Ting Liu' 'Sharon Gower Small' 'Patrick Baumgardner' 'Lydia Cartwright'
 'Michael Coppola' 'Samuil Orlioglu']


 -


142 / 222
SU-NLP at TREC NEWS 2021.
2021
['Kenan Fayoumi' 'Reyyan Yeniterzi']


 -


143 / 222
The Application of Traditional IE as a Non-traditional Method in an IR Task: TDMINER at 2021 TREC Clinical Trials.
2021
['Chengyi Zheng']


 -


144 / 222
TKB48 at TREC 2021 Conversational Assistance Track.
2021
['Yubo Fang' 'Hideo Joho' 'Sumio Fujita']


 -


145 / 222
TKB48 at TREC 2021 Fairness Ranking Track.
2021
['Zhuoqi Jin' 'Hideo Joho' 'Sumio Fujita']


 -


146 / 222
TKB48 at TREC 2021 News Track.
2021
['Lirong Zhang' 'Hideo Joho' 'Sumio Fujita']


 -


147 / 222
TU Wien at TREC DL and Podcast 2021: Simple Compression for Dense Retrieval.
2021
['Sebastian Hofstätter' 'Mete Sertkan' 'Allan Hanbury']


 -


148 / 222
Recall Aspects of Transformers for Text Ranking.
2021
['David Rau' 'Jaap Kamps']


 -


149 / 222
150 / 222
The University of Stavanger (IAI) at the TREC 2021 Conversational Assistance Track.
2021
['Ivica Kostric' 'Krisztian Balog' 'Magnus Book' 'Trond Linjordet'
 'Vinay Setty']


 -


151 / 222
Full-Collection Search with Passage and Document Evidence: Maryland at the TREC 2021 Conversational Assistance Track.
2021
['Xin Qian' 'Douglas W. Oard']


 -


152 / 222
TREC 2021 Clinical Trials Submission for Universidad del País Vasco.
2021
['Jordan Koontz' 'Maite Oronoz' 'Alicia Pérez']


 -


153 / 222
Multilingual Podcast Summarization using Longformers.
2021
['Edgar Tanaka' 'Ann Clifton' 'Md. Iftekhar Tanveer']


 -


154 / 222
UNTIIA Lab at TREC 2021 - Clinical Trial.
2021
['Huyen Nguyen' 'Haihua Chen' 'Bhanu Prasad' 'Huanhuan Zhao' 'Junhua Ding'
 'Jiangping Chen' 'Ana D. Cleveland']


 -


155 / 222
University of Glasgow Terrier Team (uogTr) at the TREC 2021 Incident Streams Track.
2021
['Alexander J. Hepburn' 'Richard McCreadie']


 -


156 / 222
UWaterlooMDS at the TREC 2021 Health Misinformation Track.
2021
['Mustafa Abualsaud' 'Kamyar Ghajar' 'Linh Nhi Phan Minh' 'Dake Zhang'
 'Irene Xiangyi Chen' 'Mark D. Smucker' 'Amir Vakili Tahami']


 -


157 / 222
158 / 222
WaterlooClarke at the TREC 2021 Conversational Assistant Track.
2021
['Xinyi Yan' 'Charles L. A. Clarke' 'Negar Arabzadeh']


 -


159 / 222
Webis at TREC 2021: Deep Learning, Health Misinformation, and Podcasts Tracks.
2021
['Alexander Bondarenko' 'Maik Fröbe' 'Sebastian Günther' 'Matthias Hagen'
 'Marcel Gohsen' 'Johannes Kiesel' 'Michael Völske' 'Benno Stein'
 'Jakob Schwerter' 'Shahbaz Syed' 'Martin Potthast']


 -


160 / 222
WisPerMed Text at TREC Clinical Trials Track 2021.
2021
['Henning Schäfer' 'Ahmad Idrissi-Yaghir' 'Wolfgang Galetzka'
 'Marie Bexte' 'Christoph M. Friedrich']


 -


161 / 222
York University at TREC 2021: Deep Learning Track.
2021
['Yizheng Huang' 'Jimmy X. Huang']


 -


162 / 222
TREC CAsT 2022: Going Beyond User Ask and System Retrieve with Initiative and Response Generation.
2022
['Paul Owoicho' 'Jeff Dalton' 'Mohammad Aliannejadi' 'Leif Azzopardi'
 'Johanne R. Trippas' 'Svitlana Vakulenko']


 -


163 / 222
Overview of the TREC 2022 Deep Learning Track.
2022
['Nick Craswell' 'Bhaskar Mitra' 'Emine Yilmaz' 'Daniel Campos'
 'Jimmy Lin' 'Ellen M. Voorhees' 'Ian Soboroff']


 -


164 / 222
Overview of the TREC 2022 Clinical Trials Track.
2022
['Kirk Roberts' 'Dina Demner-Fushman' 'Ellen M. Voorhees' 'Steven Bedrick'
 'William R. Hersh']


 -


165 / 222
166 / 222
CFDA & CLIP at TREC 2022 Conversational Assistance Track (CAsT).
2022
['Jia-Huei Ju' 'Sheng-Chieh Lin' 'Li-Young Chang' 'Ming-Feng Tsai'
 'Chuan-Ju Wang']


 -


167 / 222
CFDA & CLIP at TREC 2022 NeuCLIR Track.
2022
['Jia-Huei Ju' 'Wei-Chih Chen' 'Heng-Ta Chang' 'Cheng-Wei Lin'
 'Ming-Feng Tsai' 'Chuan-Ju Wang']


 -


168 / 222
CIP at TREC 2022 Deep Learning Track.
2022
['Jian Luo' 'Xinlin Peng' 'Xuanang Chen' 'Ben He' 'Le Sun' 'Yingfei Sun']


 -


169 / 222
Context Propagation in Conversational Search Utterances Participation of the CNR Team in CAsT 2022.
2022
['Ida Mele' 'Cristina Ioana Muntean' 'Franco Maria Nardini'
 'Raffaele Perego' 'Nicola Tonellotto']


 -


170 / 222
Matching a Patient from An Admission Note to Clinical Trials: Experiments with Query Generation and Neural-Ranking.
2022
['Vincent Nguyen' 'Maciej Rybinski' 'Sarvnaz Karimi']


 -


171 / 222
CiTIUS at the TREC 2022 Health Misinformation Track.
2022
['Marcos Fernández-Pichel' 'Manuel de Prada Corral' 'David E. Losada'
 'Juan Carlos Pichel']


 -


172 / 222
HNUST @ TREC 2022 NeuCLIR Track.
2022
['Ge Zhang' 'Qiwen Ye' 'Mengmeng Wang' 'Dong Zhou']


 -


173 / 222
University of Cambridge at TREC Cast 2022.
2022
['Adian Liusie' 'Mengjie Qian' 'Xiang Li' 'Mark J. F. Gales']


 -


174 / 222
Extremely Fast Fine-Tuning for Cross Language Information Retrieval via Generalized Canonical Correlation.
2022
['John M. Conroy' 'Neil P. Molino' 'Julia S. Yang']


 -


175 / 222
IRIT-IRIS at TREC 2022: CrisisFACTS Track.
2022
['Alexis Dusart' 'Gilles Hubert' 'Karen Pinel-Sauvagnat']


 -


176 / 222
KASYS at the TREC 2022 NeuCLIR Track.
2022
['Kenya Abe']


 -


177 / 222
MLIA-DAC@TREC CAsT 2022: Sparse Contextualized Query Embedding.
2022
['Nam Le Hai' 'Thomas Gerald' 'Thibault Formal' 'Jian-Yun Nie'
 'Benjamin Piwowarski' 'Laure Soulier']


 -


178 / 222
Using Neural Reranking and GPT-3 for Social Media Disaster Content Summarization.
2022
['Jayr Alencar Pereira' 'Robson do Nascimento Fidalgo'
 'Roberto de Alencar Lotufo' 'Rodrigo Frassetto Nogueira']


 -


179 / 222
Efficient Document Representations for Neural Text Ranking.
2022
['David Rau' 'Jaap Kamps']


 -


180 / 222
UNIMIB at TREC 2022 Clinical Trials Track.
2022
['Georgios Peikos' 'Gabriella Pasi']


 -


181 / 222
UWaterlooMDS at the TREC 2022 Health Misinformation Track.
2022
['Amir Vakili Tahami' 'Dake Zhang' 'Mark D. Smucker']


 -


182 / 222
The University of Stavanger (IAI) at the TREC 2022 Conversational Assistance Track.
2022
['Weronika Lajewska' 'Nolwenn Bernard' 'Ivica Kostric' 'Ivan Sekulic'
 'Krisztian Balog']


 -


183 / 222
Experiments with Adaptive ReRanking and ColBERT-PRF: University of Glasgow Terrier Team at TREC DL 2022.
2022
['Xiao Wang' 'Sean MacAvaney' 'Craig Macdonald' 'Iadh Ounis']


 -


184 / 222
University of Glasgow Terrier Team at the TREC 2022 Fair Ranking Track.
2022
['Thomas Jänich' 'Graham McDonald' 'Iadh Ounis']


 -


185 / 222
University of Glasgow Terrier Team at the TREC 2022 Conversational Assistance Track.
2022
['Sarawoot Kongyoung' 'Craig Macdonald' 'Iadh Ounis']


 -


186 / 222
WaterlooClarke at the TREC 2022 Conversational Assistant Track.
2022
['Siqing Huo' 'Xinyi Yan' 'Charles L. A. Clarke']


 -


187 / 222
Webis at TREC 2022: Deep Learning and Health Misinformation.
2022
['Alexander Bondarenko' 'Maik Fröbe' 'Lukas Gienapp' 'Alexander Pugachev'
 'Jan Heinrich Reimer' 'Ferdinand Schlatt' 'Ekaterina Artemova'
 'Martin Potthast' 'Benno Stein' 'Pavel Braslavski' 'Matthias Hagen']


 -


188 / 222
L3S at the TREC 2022 CrisisFACTS Track.
2022
['Thi Huyen Nguyen' 'Koustav Rudra']


 -


189 / 222
Elsevier Data Science Health Sciences at TREC 2022 Clinical Trials: Exploring Transformer Embeddings for Clinical Trial Retrieval.
2022
['Drahomira Herrmannova' 'Sharvari Jadhav' 'Harsh Sindhwa' 'Hina Nazir'
 'Elia Lima-Walton']


 -


190 / 222
HLTCOE at TREC 2022 NeuCLIR Track.
2022
['Eugene Yang' 'Dawn J. Lawrie' 'James Mayfield']


 -


191 / 222
Huawei Noah's Ark Lab at TREC NeuCLIR 2022.
2022
['Ehsan Kamalloo' 'David Alfonso-Hermelo' 'Mehdi Rezagholizadeh']


 -


192 / 222
Summarize and Expand Queries in Clinical Trials Retrieval. The IIIA Unipd at TREC 2022 Clinical Trials.
2022
['Giorgio Maria Di Nunzio' 'Guglielmo Faggioli' 'Stefano Marchesin']


 -


193 / 222
Question Answering-Based Query Expansion for Conversational Search: IIIA@UNIPD at TREC CAsT 2022.
2022
['Guglielmo Faggioli' 'Nicola Ferro' 'Mattia Romanello']


 -


194 / 222
JBNU at TREC 2022 Clinical Trials Track.
2022
['Dalya Sin' 'Woo-Kyoung Lee' 'Seung-Hyeon Jo' 'Kyung-Soon Lee']


 -


195 / 222
Non-Neural Baselines Experiments for CLIR at TREC 2022.
2022
['Paul McNamee']


 -


196 / 222
CogStack Cohort at TREC 2022 Clinical Trials Track.
2022
['Jack Wu' 'Zeljko Kraljevic' 'Thomas Searle' 'Daniel Bean'
 'Richard J. B. Dobson']


 -


197 / 222
RMIT CIDDA IR at the TREC 2022 Fair Ranking Track.
2022
['Sachin Pathiyan Cherumanal' 'Marwah Alaofi' 'Reham Abdullah Altalhi'
 'Elham Naghizade' 'Falk Scholer' 'Damiano Spina']


 -


198 / 222
An Exploration of Learning-to-re-rank Using a Two-step Framework for Fair Ranking.
2022
['Fumian Chen' 'Hui Fang']


 -


199 / 222
Probabilistic Structured Queries: The University of Maryland at the TREC 2022 NeuCLIR Track.
2022
['Suraj Nair' 'Douglas W. Oard']


 -


200 / 222
Multi-Faceted Question Fusion in the TREC 2022 CrisisFACTS Track.
2022
['Nathaniel W. Rollings' 'Peter A. Rankel' 'Douglas W. Oard']


 -


201 / 222
York University at TREC 2022: Deep Learning Track.
2022
['Yizheng Huang' 'Jimmy X. Huang']


 -


202 / 222
Answering Live Questions from Heterogeneous Data Sources SMART in Live QA at TREC 2016
2016
['Edgard Marx' 'Sandro Coelho']


 -


203 / 222
Overview of the TREC 2019 Decision Track
2019
['Mustafa Abualsaud' 'Christina Lioma' 'Maria Maistro' 'Mark D. Smucker'
 'Guido Zuccon']


 -


204 / 222
University of Glasgow (uog_tw) at TREC Microblog 2012
2012
['Jesus A. Rodriguez Perez' 'Andrew J. McMinn' 'Joemon M. Jose']


 -


205 / 222
Overview of the TREC-2001 Web Track
2001
['David Hawking' 'Nick Craswell']


 -


206 / 222
CLIP at TREC 2016: LiveQA and RTS
2016
['Mossaab Bagdouri' 'Douglas W. Oard']


 -


207 / 222
Anserini at TREC 2018: CENTRE, Common Core, and News Tracks
2018
['Peilin Yang' 'Jimmy Lin']


 -


208 / 222
H2oloo at TREC 2019: Combining Sentence and Document Evidence in the Deep Learning Track
2019
['Zeynep Akkalyoncu Yilmaz' 'Shengjin Wang' 'Jimmy Lin']


 -


209 / 222
Overview of the TREC 2018 CENTRE Track
2018
['Ian Soboroff' 'Nicola Ferro' 'Maria Maistro' 'Tetsuya Sakai']


 -


210 / 222
211 / 222
Classification of Incident-related Tweets: Tackling Imbalanced Training Data using Hybrid CNNs and Translation-based Data Augmentation
2018
['Anna Kruspe' 'Jens Kersten' 'Matti Wiegmann' 'Benno Stein'
 'Friederike Klan']


 -


212 / 222
TREC 2019 News Track Overview
2019
['Ian Soboroff' 'Shudong Huang' 'Donna Harman']


 -


213 / 222
214 / 222
DAIICT-LDRP at TREC RTS 2017: Real Time Push Notification and Post Summarization
2017
['Sandip Modha' 'Chintak Mandalia' 'Shyamal Shahshah' 'Sahil Kewlani'
 'Bhavya Shah' 'Deep Doshi' 'Prasenjit Majumder']


 -


215 / 222
The CLaC System at the TREC 2019 News Track
2019
['Pavel Khloponin' 'Leila Kosseim']


 -


216 / 222
University of Essex at the TREC 2012 Session Track
2012
['M-Dyaa Albakour' 'Udo Kruschwitz']


 -


217 / 222
GPLSI at TREC 2019 Incident Streams Track
2019
['Javi Fernández' 'Fernando Llopis' 'Patricio Martínez-Barco'
 'José M. Gómez']


 -


218 / 222
219 / 222
Siena College’s Institute of Artificial Intelligence TREC 2016 Contextual Suggestion Track
2016
['Tristan Canova' 'Daniel Carpenter' 'Kevin Danaher' 'Neil Devine'
 'Darren Lim']


 -


220 / 222
221 / 222
TREC CAR Y3: Complex Answer Retrieval Overview
2019
['Laura Dietz' 'John Foley']


 -


222 / 222
NOVASearch at Precision Medicine 2017
2017
['Gonçalo Araújo' 'André Mourão' 'João Magalhães']


 -


In [24]:
counter = 0

for i , j in df_no_mag_not_found_still.iterrows():

    # Clean the title by removing commas
    title = j["Title"].replace(",", "")
    # URL-encode the title for the search query
    title_encoded = urllib.parse.quote(title)
    
    # Search for the record using the encoded title
    work = Works().search_filter(title=title_encoded).get()

    # Check if exactly one result was found
    if len(work) == 1:
        
        # Extract years from the title of the search result and the current record
        years_semantic = extract_years(work[0]["title"])
        years_df = extract_years(j["Title"])

        # Check if the titles are sufficiently similar and the years match
        if fuzz.ratio(j["Title"], work[0]["title"]) > 95 and years_df == years_semantic:
            # Skip processing if the title and years match
            continue
            
        else:
            # If the work has no locations and no 'Source' key, print the work's ID

            if work[0]["locations_count"] == 0 and "Source" not in work[0]:
                print(work[0]["id"]) 
                print("No locations and no 'Source' key found.")
                continue

            else:
                # Print details of the current record and the work, then prompt for a new ID
                print(j["Title"])
                print(j["PubYear"])
                print(j["Authors"])
                print(work[0]["id"])
                # Prompt the user for a new ID if the current one is not satisfactory
                new_id = input()
                if new_id == "-":
                    # Skip if no new ID is provided
                    continue
                else:
                    # Update the last_dict_new with the new ID
                    final_missing_records[j["ID"]] = Works()["W" + new_id]


1 / 222
2 / 222
NTU at TREC 2006 Genomics Track.
2006
['Kevin Hsin-Yih Lin' 'Wen-Juan Hou' 'Hsin-Hsi Chen']
https://openalex.org/W162668321


 162668321


3 / 222
https://openalex.org/W1502658005
keine locations und keine Source
4 / 222
https://openalex.org/W2157553519
keine locations und keine Source
5 / 222
6 / 222
7 / 222
8 / 222
9 / 222
10 / 222
A Concept-Based Framework for Passage Retrieval at Genomics.
2006
['Wei Zhou' 'Clement T. Yu' 'Vetle I. Torvik' 'Neil R. Smalheiser']
https://openalex.org/W108921306


 108921306


11 / 222
https://openalex.org/W2149043723
keine locations und keine Source
12 / 222
The Ephyra QA System at TREC 2006.
2006
['Nico Schlaefer' 'P. Gieselman' 'Guido Sautter']
https://openalex.org/W2152011797


 2152011797


13 / 222
14 / 222
https://openalex.org/W1511573436
keine locations und keine Source
15 / 222
The University of Washington's UWclmaQA System.
2006
['Dan Jinguji' 'William D. Lewis' 'Efthimis N. Efthimiadis' 'Joshua Minor'
 'Albert Bertram' 'Shauna Eggers' 'Joshua Johanson' 'Brian Nisonger'
 'Ping Yu' 'Zhengbo Zhou']
https://openalex.org/W163118932


 163118932


16 / 222
Twease at TREC 2006: Breaking and Fixing BM25 Scoring With Query Expansion, A Biologically Inspired Double Mutant Recovery Experiment.
2006
['Kevin C. Dorff' 'Matthew J. Wood' 'Fabien Campagne']
https://openalex.org/W2172272999


 2172272999


17 / 222
https://openalex.org/W2917915395
keine locations und keine Source
18 / 222
19 / 222
Experiments in TREC 2007 Blog Opinion Task at CAS-ICT.
2007
['Xiangwen Liao' 'Donglin Cao' 'Yu Wang' 'Wei Liu' 'Songbo Tan'
 'Hongbo Xu' 'Xueqi Cheng']
https://openalex.org/W2102458764


 2102458764


20 / 222
Semantic Extensions of the Ephyra QA System for TREC 2007.
2007
['Nico Schlaefer' 'Jeongwoo Ko' 'Justin Betteridge' 'Manas A. Pathak'
 'Eric Nyberg' 'Guido Sautter']
https://openalex.org/W2099234488


 2099234488


21 / 222
FDU at TREC 2007: Opinion Retrieval of Blog Track.
2007
['Qi Zhang' 'Bingqing Wang' 'Lide Wu' 'Xuanjing Huang']
https://openalex.org/W2107087467


 2107087467


22 / 222
Lucene and Juru at TREC 2007: 1-Million Queries Track.
2007
['Doron Cohen' 'Einat Amitay' 'David Carmel']
https://openalex.org/W2160460392


 2160460392


23 / 222
https://openalex.org/W192700169
keine locations und keine Source
24 / 222
25 / 222
Using IR-n for Information Retrieval of Genomics Track.
2007
['María Pardiño' 'Rafael M. Terol' 'Patricio Martínez-Barco'
 'Fernando Llopis' 'Elisa Noguera']
https://openalex.org/W1581863907


 1581863907


26 / 222
Access to Legal Documents: Exact Match, Best Match, and Combinations.
2007
['Avi Arampatzis' 'Jaap Kamps' 'Martijn Kooken' 'Nir Nussbaum']
https://openalex.org/W1846229917


 1846229917


27 / 222
28 / 222
29 / 222
30 / 222
The Pronto QA System at TREC 2007: Harvesting Hyponyms, Using Nominalisation Patterns, and Computing Answer Cardinality.
2007
['Johan Bos' 'Edoardo Guzzetti' 'James R. Curran']
https://openalex.org/W2165899451


 2165899451


31 / 222
https://openalex.org/W52122189
keine locations und keine Source
32 / 222
https://openalex.org/W2139327339
keine locations und keine Source
33 / 222
34 / 222
35 / 222
FEUP at TREC 2008 Blog Track: Using Temporal Evidence for Ranking and Feed Distillation.
2008
['Sérgio Nunes' 'Cristina Ribeiro' 'Gabriel David']
https://openalex.org/W2127340894


 2127340894


36 / 222
37 / 222
Diversifying Search Results with Popular Subtopics.
2009
['Dawei Yin' 'Zhenzhen Xue' 'Xiaoguang Qi' 'Brian D. Davison']
https://openalex.org/W2096665395


 2096665395


38 / 222
Lucene for n-grams using the CLUEWeb Collection.
2009
['Gregory B. Newby' 'Christopher T. Fallen' 'Kylie McCormick']
https://openalex.org/W1545045429


 1545045429


39 / 222
From Blogs to News: Identifying Hot Topics in the Blogosphere.
2009
['Wouter Weerkamp' 'Manos Tsagkias' 'Maarten de Rijke']
https://openalex.org/W1497615058


 1497615058


40 / 222
41 / 222
Heuristic Ranking and Diversification of Web Documents.
2009
['Jiyin He' 'Krisztian Balog' 'Katja Hofmann' 'Edgar Meij'
 'Maarten de Rijke' 'Manos Tsagkias' 'Wouter Weerkamp']
https://openalex.org/W1516228069


 1516228069


42 / 222
https://openalex.org/W1594180669
keine locations und keine Source
43 / 222
UMass Amherst and UT Austin @ the TREC 2009 Relevance Feedback Track.
2009
['Marc-Allen Cartright' 'Jangwon Seo' 'Matthew Lease']
https://openalex.org/W343173671


 343173671


44 / 222
45 / 222
46 / 222
https://openalex.org/W2405178010
keine locations und keine Source
47 / 222
Using Anchor Text, Spam Filtering and Wikipedia for Web Search and Entity Ranking.
2010
['Jaap Kamps' 'Rianne Kaptein' 'Marijn Koolen']
https://openalex.org/W1878118535


 1878118535


48 / 222
49 / 222
Searching for Entities When Retrieval Meets Extraction.
2010
['Qi Li' 'Daqing He']
https://openalex.org/W1484270359


 -


50 / 222
AEHRC & QUT at TREC 2011 Medical Track: A Concept-Based Information Retrieval Approach.
2011
['Bevan Koopman' 'Michael Lawley' 'Peter Bruza' 'Laurianne Sitbon']
https://openalex.org/W2294298503


 2294298503


51 / 222
52 / 222
BiTeM Group Report for TREC Chemical IR Track 2011.
2011
['Julien Gobeill' 'Arnaud Gaudinat' 'Patrick Ruch' 'Emilie Pasche'
 'Douglas Teodoro' 'Dina Vishnyakova']
https://openalex.org/W122491329


 122491329


53 / 222
BiTeM Group Report for TREC Medical Records Track 2011.
2011
['Julien Gobeill' 'Arnaud Gaudinat' 'Patrick Ruch' 'Emilie Pasche'
 'Douglas Teodoro' 'Dina Vishnyakova']
https://openalex.org/W2296534770


 2296534770


54 / 222
55 / 222
56 / 222
https://openalex.org/W2166278490
keine locations und keine Source
57 / 222
ISTI@TREC Microblog Track 2011: Exploring the Use of Hashtag Segmentation and Text Quality Ranking.
2011
['Giacomo Berardi' 'Andrea Esuli' 'Diego Marcheggiani'
 'Fabrizio Sebastiani']
https://openalex.org/W2182111380


 2182111380


58 / 222
https://openalex.org/W2404508373
keine locations und keine Source
59 / 222
Identifying Patients for Clinical Studies from Electronic Health Records: TREC Medical Records Track at OHSU.
2011
['Steven Bedrick' 'Kyle H. Ambert' 'Aaron M. Cohen' 'William R. Hersh']
https://openalex.org/W1494416095


 1494416095


60 / 222
Medical-Miner at TREC 2011 Medical Records Track.
2011
['Juan Manuel Córdoba' 'Manuel J. Maña López' 'Noa P. Cruz Díaz'
 'Jacinto Mata' 'Fernando Aparicio' 'Manuel de Buenaga Rodríguez'
 'Daniel Glez-Peña' 'Florentino Fdez-Riverola']
https://openalex.org/W2400476955


 2400476955


61 / 222
MetaMap is a Superior Baseline to a Standard Document Retrieval Engine for the Task of Finding Patient Cohorts in Clinical Free Text.
2011
['K. Bretonnel Cohen' 'Tom Christiansen' 'Lawrence E. Hunter']
https://openalex.org/W2406492869


 -


62 / 222
University of Glasgow (UGLA-D) at TREC Microblog 2011: Temporal Pseudo-Relevance Feedback in Microblog Retrieval.
2011
['Stewart Whiting' 'Iraklis A. Klampanos' 'Joemon M. Jose']
https://openalex.org/W2400701433


 2400701433


63 / 222
The University of Illinois' Graduate School of Library and Information Science at TREC 2011.
2011
['Miles Efron']
https://openalex.org/W2917901131


 2917901131


64 / 222
65 / 222
66 / 222
The University of Iowa at TREC 2011: Microblogs, Medical Records and Crowdsourcing.
2011
['Sanmitra Bhattacharya' 'Christopher G. Harris' 'Yelena Mejova'
 'Chao Yang' 'Padmini Srinivasan']
https://openalex.org/W2399618403


 2399618403


67 / 222
PITT at TREC 2011 Session Track.
2011
['Jiepu Jiang' 'Shuguang Han' 'Jia Wu' 'Daqing He']
https://openalex.org/W206102646


 206102646


68 / 222
69 / 222
70 / 222
71 / 222
72 / 222
73 / 222
74 / 222
75 / 222
The HLTCOE Approach to the TREC 2012 KBA Track.
2012
['Brian Kjersten' 'Paul McNamee']
https://openalex.org/W2125135708


 2125135708


76 / 222
TREC Microblog 2012 Track: Real-Time Ranking Algorithm for Microblog Ranking Systems.
2012
['Davide Feltoni Gurini' 'Fabio Gasparetti']
https://openalex.org/W259394849


 259394849


77 / 222
78 / 222
https://openalex.org/W31723657
keine locations und keine Source
79 / 222
Overview of the TREC 2013 Contextual Suggestion Track.
2013
['Adriel Dean-Hall' 'Charles L. A. Clarke' 'Nicole Simone' 'Jaap Kamps'
 'Paul Thomas' 'Ellen M. Voorhees']
https://openalex.org/W36361912


 36361912


80 / 222
81 / 222
82 / 222
University of Amsterdam at the TREC 2013 Contextual Suggestion Track: Learning User Preferences from Wikitravel Categories.
2013
['Marijn Koolen' 'Hugo C. Huurdeman' 'Jaap Kamps']
https://openalex.org/W2402459283


 2402459283


83 / 222
84 / 222
85 / 222
86 / 222
Better Contextual Suggestions in ClueWeb12 Using Domain Knowledge Inferred from The Open Web.
2014
['Thaer Samar' 'Arjen P. de Vries' 'Alejandro Bellogín']
https://openalex.org/W1649673095


 1649673095


87 / 222
Fusing manual and machine feedback in biomedical domain.
2014
['Jainisha Sankhavara' 'Fenny Thakrar' 'Prasenjit Majumder'
 'Shamayeeta Sarkar']
https://openalex.org/W1860695435


 1860695435


88 / 222
89 / 222
90 / 222
SNUMedinfo at TREC CDS track 2014: Medical case-based retrieval task.
2014
['Sungbin Choi' 'Jinwook Choi']
https://openalex.org/W1629874595


 1629874595


91 / 222
https://openalex.org/W2123497698
keine locations und keine Source
92 / 222
93 / 222
RMIT at the 2017 TREC CORE Track.
2017
['Rodger Benham' 'Luke Gallagher' 'Joel M. Mackenzie'
 'Tadele Tedla Damessie' 'Ruey-Cheng Chen' 'Falk Scholer'
 'Alistair Moffat' 'J. Shane Culpepper']
https://openalex.org/W2886702861


 2886702861


94 / 222
95 / 222
RMIT at the 2018 TREC CORE Track.
2018
['Rodger Benham' 'Luke Gallagher' 'Joel M. Mackenzie' 'Binsheng Liu'
 'Xiaolu Lu' 'Falk Scholer' 'J. Shane Culpepper' 'Alistair Moffat']
https://openalex.org/W2979285747


 2979285747


96 / 222
97 / 222
Overview of the TREC 2020 Deep Learning Track.
2020
['Nick Craswell' 'Bhaskar Mitra' 'Emine Yilmaz' 'Daniel Campos']
https://openalex.org/W3175111331


 -


98 / 222
99 / 222
SIB Text Mining at TREC Precision Medicine 2020.
2020
['Emilie Pasche' 'Déborah Caucheteur' 'Luc Mottin' 'Anaïs Mottaz'
 'Julien Gobeill' 'Patrick Ruch']
https://openalex.org/W3174156767


 3174156767


100 / 222
101 / 222
102 / 222
103 / 222
104 / 222
Overview of TREC 2021.
2021
['Ian Soboroff']
https://openalex.org/W4321524525


 -


105 / 222
106 / 222
107 / 222
108 / 222
109 / 222
110 / 222
111 / 222
112 / 222
113 / 222
114 / 222
115 / 222
An approach to relevant clinical trials retrieving.
2021
['Mariia Fedorova']
https://openalex.org/W2400952622


 -


116 / 222
117 / 222
118 / 222
119 / 222
120 / 222
121 / 222
122 / 222
123 / 222
124 / 222
125 / 222
126 / 222
127 / 222
128 / 222
129 / 222
130 / 222
131 / 222
132 / 222
133 / 222
134 / 222
135 / 222
136 / 222
137 / 222
138 / 222
139 / 222
140 / 222
141 / 222
142 / 222
143 / 222
144 / 222
145 / 222
146 / 222
147 / 222
148 / 222
149 / 222
150 / 222
151 / 222
152 / 222
153 / 222
154 / 222
155 / 222
156 / 222
157 / 222
Semantic Search for Background Linking in News Articles.
2021
['Udhav Sethi' 'Anup Anand Deshmukh']
https://openalex.org/W3044872150


 -


158 / 222
159 / 222
160 / 222
161 / 222
162 / 222
163 / 222
164 / 222
165 / 222
Hybrid Retrieval and Multi-stage Ranking at TREC 2022 Deep Learning Track.
2022
['Guangwei Xu' 'Yanzhao Zhang' 'Longhui Zhang' 'Dingkun Long'
 'Pengjun Xie' 'Ruijie Guo']
https://openalex.org/W4386148436


 -


166 / 222
167 / 222
168 / 222
169 / 222
170 / 222
171 / 222
172 / 222
173 / 222
174 / 222
175 / 222
176 / 222
177 / 222
178 / 222
179 / 222
180 / 222
181 / 222
182 / 222
183 / 222
184 / 222
185 / 222
186 / 222
187 / 222
188 / 222
189 / 222
190 / 222
191 / 222
192 / 222
193 / 222
194 / 222
195 / 222
196 / 222
197 / 222
198 / 222
199 / 222
200 / 222
201 / 222
202 / 222
203 / 222
204 / 222
205 / 222
206 / 222
207 / 222
208 / 222
209 / 222
210 / 222
SIB Text Mining at TREC 2018 Precision Medicine Track
2018
['Emilie Pasche' 'Paul van Rijen' 'Julien Gobeill' 'Anaïs Mottaz'
 'Luc Mottin' 'Patrick Ruch']
https://openalex.org/W2988098012


 2988098012


211 / 222
212 / 222
213 / 222
214 / 222
215 / 222
216 / 222
217 / 222
218 / 222
219 / 222
220 / 222
221 / 222
222 / 222


In [79]:
final_missing_records["trec_663"] = Works()["W2141614686"]
final_missing_records["trec_723"] = Works()["W2899701551"]
final_missing_records["trec_1318"] = Works()["W2917201903"]
final_missing_records["trec_1323"] = Works()["W1483624827"]


In [81]:
full_json = OpenAlex_by_MAG | documents_without_MAG | final_missing_records

In [83]:
with open('../../../data/OpenAlex_TREC.json', 'w', encoding='utf-8') as f:
    json.dump(full_json, f, ensure_ascii=False, indent=4)

In [84]:
df_finally_missing = df_metadata_trec[~df_metadata_trec["ID"].isin(list(full_json.keys()))]

In [86]:
df_finally_missing.to_parquet("../../../data/OpenAlex_TREC_not_found.parquet")