In [8]:
# Jupyter Notebook cells for interactive preview

# Cell 1: Load and basic info
import pandas as pd
import numpy as np

# Load your file
df = pd.read_pickle("pet_kinetic_modeling_last_5_years_as_of_2025-07-24.pkl")
print(f"Dataset shape: {df.shape}")
df.info()

Dataset shape: (739, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 739 entries, 0 to 738
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         739 non-null    object
 1   authors       739 non-null    object
 2   journal       739 non-null    object
 3   year          739 non-null    object
 4   abstract      739 non-null    object
 5   url           739 non-null    object
 6   search_query  739 non-null    object
 7   search_date   739 non-null    object
 8   embedding     739 non-null    object
dtypes: object(9)
memory usage: 52.1+ KB


In [9]:
# Cell 2: Preview structure
df.head()

Unnamed: 0,title,authors,journal,year,abstract,url,search_query,search_date,embedding
0,Total-Body PET Kinetic Modeling and Potential ...,"Yiran Wang, Elizabeth Li, Simon R Cherry, Guob...",PET clinics,2021,The uEXPLORER total-body PET/CT system provide...,https://pubmed.ncbi.nlm.nih.gov/34353745/,PET Kinetic Modeling,2025-07-24,"[-0.2511961, 0.3144019, 0.3854435, 0.099322885..."
1,Kinetic modeling and parameter estimation of T...,"Catriona Wimberley, Sonia Lavisse, Ansel Hillm...",European journal of nuclear medicine and molec...,2021,Purpose: Translocator protein 18-kDa (TSPO) im...,https://pubmed.ncbi.nlm.nih.gov/33693967/,PET Kinetic Modeling,2025-07-24,"[-0.2343197, 0.45217928, 0.19918086, 0.0362724..."
2,Kinetic modeling and parametric imaging with d...,"Antonia Dimitrakopoulou-Strauss, Leyun Pan, Ch...",European journal of nuclear medicine and molec...,2021,Dynamic PET (dPET) studies have been used unti...,https://pubmed.ncbi.nlm.nih.gov/32430580/,PET Kinetic Modeling,2025-07-24,"[-0.13470781, 0.22336128, 0.21109794, 0.087820..."
3,Metabolic kinetic modeling of [11C]methionine ...,"Jiajin Li, Beiwen Ni, Xiaofeng Yu, Cheng Wang,...",European journal of nuclear medicine and molec...,2023,Purpose: Multiple myeloma (MM) is a malignant ...,https://pubmed.ncbi.nlm.nih.gov/37039900/,PET Kinetic Modeling,2025-07-24,"[-0.04451454, 0.3701654, 0.23768033, 0.0193843..."
4,High-Temporal-Resolution Lung Kinetic Modeling...,"Yiran Wang, Benjamin A Spencer, Jeffrey Schmal...",Journal of nuclear medicine : official publica...,2023,Tracer kinetic modeling in dynamic PET has the...,https://pubmed.ncbi.nlm.nih.gov/37116916/,PET Kinetic Modeling,2025-07-24,"[-0.21560048, 0.19309267, 0.3516659, 0.1280907..."


In [10]:
# Cell 3: Check specific columns
print("Search queries in this dataset:")
print(df['search_query'].value_counts())
print("\nYear distribution:")
print(df['year'].value_counts().sort_index())

Search queries in this dataset:
search_query
PET Kinetic Modeling    739
Name: count, dtype: int64

Year distribution:
year
2020     52
2021    138
2022    143
2023    139
2024    150
2025    117
Name: count, dtype: int64


In [11]:
# Cell 4: Sample article
idx = 0  # Change this to see different articles
print(f"ARTICLE {idx + 1}:")
print(f"Title: {df.iloc[idx]['title']}")
print(f"Authors: {df.iloc[idx]['authors']}")
print(f"Journal: {df.iloc[idx]['journal']}")
print(f"Year: {df.iloc[idx]['year']}")
print(f"Abstract: {df.iloc[idx]['abstract']}")
print(f"Embedding shape: {df.iloc[idx]['embedding'].shape}")

ARTICLE 1:
Title: Total-Body PET Kinetic Modeling and Potential Opportunities Using Deep Learning
Authors: Yiran Wang, Elizabeth Li, Simon R Cherry, Guobao Wang
Journal: PET clinics
Year: 2021
Abstract: The uEXPLORER total-body PET/CT system provides a very high level of detection sensitivity and simultaneous coverage of the entire body for dynamic imaging for quantification of tracer kinetics. This article describes the fundamentals and potential benefits of total-body kinetic modeling and parametric imaging focusing on the noninvasive derivation of blood input function, multiparametric imaging, and high-temporal resolution kinetic modeling. Along with its attractive properties, total-body kinetic modeling also brings significant challenges, such as the large scale of total-body dynamic PET data, the need for organ and tissue appropriate input functions and kinetic models, and total-body motion correction. These challenges, and the opportunities using deep learning, are discussed.
Emb

In [12]:
# Cell 5: Embeddings verification
# Check that all embeddings have same dimension
embedding_shapes = [emb.shape for emb in df['embedding']]
print(f"All embeddings have shape {embedding_shapes[0]}: {all(shape == embedding_shapes[0] for shape in embedding_shapes)}")

# Convert to matrix for analysis
embeddings_matrix = np.vstack(df['embedding'].values)
print(f"Embeddings matrix shape: {embeddings_matrix.shape}")
print(f"Embeddings data type: {embeddings_matrix.dtype}")

All embeddings have shape (768,): True
Embeddings matrix shape: (739, 768)
Embeddings data type: float32


In [13]:
# Cell 6: Quick similarity check (optional)
from sklearn.metrics.pairwise import cosine_similarity

# Calculate similarity between first 5 articles
if len(df) >= 5:
    similarity_matrix = cosine_similarity(embeddings_matrix[:5])
    print("Cosine similarity between first 5 articles:")
    print(similarity_matrix.round(3))

Cosine similarity between first 5 articles:
[[1.    0.985 0.99  0.985 0.989]
 [0.985 1.    0.989 0.99  0.987]
 [0.99  0.989 1.    0.987 0.989]
 [0.985 0.99  0.987 1.    0.991]
 [0.989 0.987 0.989 0.991 1.   ]]
