In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# TinyLlama model for low-VRAM usage
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",          # Automatically use GPU if available
    torch_dtype="auto",         # fp16 on supported GPU
    trust_remote_code=True
)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Prompt for keyword extraction
prompt = """### Instruction:
Extract exactly 5 to 8 keywords or technical terms from the following paper title and abstract.
Only return a comma-separated list of keywords.

### Title:
How Do Self-Interaction Errors Associated with Stretched Bonds Affect Barrier Height Predictions?

### Abstract:
Density functional theory suffers from self-interaction errors that generally result in the underestimation of chemical reaction barrier heights. This is commonly attributed to the tendency of density functional approximations to overstabilize delocalized densities that typically occur in the stretched bonds of transition state structures. The Perdew–Zunger self-interaction correction (PZSIC) and locally scaled self-interaction correction (LSIC) improve the prediction of barrier heights of chemical reactions, with LSIC giving better accuracy than PZSIC on average. These methods employ an orbital-by-orbital correction scheme to remove the one-electron SIE. In the context of barrier heights, this allows an analysis of how the self-interaction correction (SIC) for each orbital contributes to the calculated barriers using Fermi–Löwdin orbitals. We hypothesize that the SIC contribution to the reaction barrier comes mainly from a limited number of orbitals that are directly involved in bond-breaking and bond-making in the reaction transition state. We call these participant orbitals (POs), in contrast to spectator orbitals (SOs) which are not directly involved in changes to the bonding. We test this hypothesis for the reaction barriers of the BH76 benchmark set of reactions. We find that the stretched-bond orbitals indeed make the largest individual SIC contributions to the barriers. These contributions increase the barrier heights relative to LSDA, which underpredicts the barrier. However, the full stretched-bond hypothesis does not hold in all cases for either PZSIC or LSIC. There are many cases where the total SIC contribution from the SOs is significant and cannot be ignored. The size of the SIC contribution to the barrier height is a key indicator. A large SIC correction is correlated to a large LSDA error in the barrier, showing that PZSIC properly gives larger corrections when corrections are needed most. A comparison of the performance of PZSIC and LSIC shows that the two methods have similar accuracy for reactions with large LSDA errors, but LSIC is clearly better for reactions with small errors. We trace this to an improved description of reaction energies in LSIC.

### Keywords:"""

# Run generation
response = pipe(prompt, max_new_tokens=100, do_sample=False)
output = response[0]['generated_text']

# Extract keywords
keywords = output.split("### Keywords:")[-1].strip()
print("Extracted Keywords:", [kw.strip() for kw in keywords.split(',') if kw.strip()])


Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Extracted Keywords: ['stretched bonds', 'density functional theory', 'self-interaction correction', 'reaction barrier heights', 'barrier height predictions', 'PZSIC', 'LSIC', 'orbitals', 'Fermi–Löwdin', 'barrier heights', 'barrier height predictions', 'self-interaction errors', 'density functional approximations', 'transition state structures', 'barrier heights', 'reaction barriers', 'barrier height predictions', 'self-interaction correction', 'orbitals', 'Fermi']


In [None]:
presenting_author = "Priyanka Bholanath Shukla | University of Pittsburgh | The University of Texas at El Paso | Central Michigan University"
universities = [u.strip() for u in presenting_author.split('|') if 'University' in u]

# 3. Geocode Universities
from geopy.geocoders import Nominatim
from time import sleep

geolocator = Nominatim(user_agent="keyword_map")
university_locations = {}

for uni in universities:
    location = geolocator.geocode(uni)
    if location:
        university_locations[uni] = (location.latitude, location.longitude)
    sleep(1)  # be kind to free geocoding APIs

# 4. Create DataFrame for Visualization
import pandas as pd

df = pd.DataFrame({
    "university": list(university_locations.keys()),
    "lat": [loc[0] for loc in university_locations.values()],
    "lon": [loc[1] for loc in university_locations.values()],
    # "keyword": keywords[:len(university_locations)],
    "author": ["Priyanka Bholanath Shukla"] * len(university_locations),
    "datetime": ["2023-11-06"] * len(university_locations)
})

# 5. Plotly World Map Visualization
import plotly.express as px

fig = px.scatter_geo(
    df,
    lat='lat',
    lon='lon',
    # text='keyword',
    hover_name='university',
    hover_data={ "author": True, "datetime": True},
    projection="natural earth"
)

fig.update_layout(title="Research Keyword Origins by University", title_x=0.5)
fig.show()


: 

In [1]:
import torch
import torchvision
from transformers import pipeline

print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())


  from .autonotebook import tqdm as notebook_tqdm


2.5.1+cu121
0.20.1+cu121
True
