### Importing Libraries

In [1]:
import pandas as pd
from transformers import DPRQuestionEncoder, DPRContextEncoder, DPRQuestionEncoderTokenizer, DPRContextEncoderTokenizer
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
import faiss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the dataset
df = pd.read_csv('complete_renewable_energy_dataset.csv')

# Prepare contexts: Select relevant columns and combine into a single text field
df['Context'] = df.apply(lambda row: f"In {row['Country']} in {row['Year']}, with energy type {row['Energy Type']}, "
                                      f"production was {row['Production (GWh)']} GWh, with an installed capacity of "
                                      f"{row['Installed Capacity (MW)']} MW and an investment of {row['Investments (USD)']} USD.", axis=1)

contexts = df['Context'].tolist()


In [3]:
# Initialize the context encoder
context_encoder_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
context_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")

# Encode all contexts
context_embeddings = []
for context in contexts:
    inputs = context_encoder_tokenizer(context, return_tensors="pt")
    with torch.no_grad():
        embedding = context_encoder(**inputs).pooler_output.cpu().numpy()
    context_embeddings.append(embedding)



To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification mod

NameError: name 'np' is not defined

In [5]:
import numpy as np
context_embeddings = np.vstack(context_embeddings)

In [6]:
# Build the FAISS index
index = faiss.IndexFlatIP(context_embeddings.shape[1])  # Using inner product for similarity
index.add(context_embeddings)


In [9]:
!pip install datasets

Defaulting to user installation because normal site-packages is not writeable
Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/527.3 kB ? eta -:--:--
   ------------------- --------------

In [8]:
# Initialize the retriever with FAISS index
retriever = RagRetriever.from_pretrained(
    "facebook/rag-token-nq",
    index_name="custom",
    passages=contexts,
    index=index
)


ImportError: 
RagRetriever requires the 🤗 Datasets library but it was not found in your environment. You can install it with:
```
pip install datasets
```
In a notebook or a colab, you can install it by executing a cell with
```
!pip install datasets
```
then restarting your kernel.

Note that if you have a local folder named `datasets` or a local python file named `datasets.py` in your current
working directory, python may try to import this instead of the 🤗 Datasets library. You should rename this folder or
that python file if that's the case. Please note that you may need to restart your runtime after installation.
