# Warmup Process to Index and Vectorize Data

## Load Vector Store and Embeddings 

In [1]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [2]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

## Load Data

In [3]:
import pickle

file_path = 'data/scraped_urls.json'

with open(file_path, 'rb') as file:
    raw_data = pickle.load(file)

print(raw_data[0])

[Document(metadata={'og:url': 'https://datascience.cy/programme-structure/', 'language': 'en-US', 'article:published_time': '2021-03-09T20:37:00+00:00', 'ogUrl': 'https://datascience.cy/programme-structure/', 'ogImage': 'https://datascience.cy/wp-content/uploads/2021/03/program-header.jpg', 'generator': ['All in One SEO (AIOSEO) 4.8.1', 'WordPress 6.7.2', 'Powered by Slider Revolution 6.2.23 - responsive, Mobile-Friendly Slider Plugin for WordPress with comfortable drag and drop interface.', 'WP Rocket 3.18.3'], 'og:image:secure_url': 'https://datascience.cy/wp-content/uploads/2021/03/program-header.jpg', 'og:image:height': '500', 'twitter:title': 'Programme structure - Master in Data Science - University of Cyprus', 'og:description': 'The Master in Data Science is a highly-selective programme for students who want to begin or advance their careers in Data Science. The duration of the programme is 1,5-years (90 ECTS), while the language of instruction is English.', 'twitter:image': 'ht

In [4]:
documents_to_add_to_vectorstore = [item[0] for item in raw_data if item]

In [5]:
print(documents_to_add_to_vectorstore[0].page_content)

- [Programme structure](https://datascience.cy/programme-structure/)
- [People](https://datascience.cy/people/)
- [Admissions](https://datascience.cy/admissions/)
- [Capstone Projects](https://datascience.cy/capstone-projects/)
- [Contact](https://datascience.cy/contact/)

[![](https://datascience.cy/wp-content/uploads/2021/03/logo.png)](https://datascience.cy/)

[![](https://datascience.cy/wp-content/uploads/2021/03/logo.png)](https://datascience.cy/)

- [Programme structure](https://datascience.cy/programme-structure/)
- [People](https://datascience.cy/people/)
- [Admissions](https://datascience.cy/admissions/)
- [Capstone Projects](https://datascience.cy/capstone-projects/)
- [Contact](https://datascience.cy/contact/)

### >>>Master

### ... in Data Science

Programme\_Structure

[APPLY NOW](https://applications.ucy.ac.cy/postgraduate_appl/MNG_USER_en.login_frm) [DOWNLOAD BROCHURE](https://datascience.cy/wp-content/uploads/2021/05/Data-Science_Prospectus_lowres.pdf)

### >>Overview


## Add Documents to Vector Store

In [None]:
vector_store.add_documents(documents=documents_to_add_to_vectorstore)

['0e281bc4-c3d6-4701-8942-39cb003962a9',
 '96b95327-5ae9-4a37-a355-f5d91ea93b61',
 'cfd30312-e06b-4ea3-b1b0-27321183e60c',
 'd69af934-6d96-4a09-a2a8-14a900b847a5',
 '59b9bafd-f56a-4ca3-8d3a-9e0310b86a8b',
 '8975f9b1-2e25-4ebf-b8bf-bb0ed46009bc',
 'f020ec11-29f6-40ec-a654-2ca4c1e29c1d',
 '6573f65a-6d66-43eb-b14a-4888ed7666a3',
 'bf9c73f1-945e-4153-a5e9-5189df4af624',
 '21ed2267-1d30-406f-8e1b-cccdfb34b872',
 'd0b01d0b-8f3c-401e-9506-f9cd213b7893',
 'fce65694-3c47-4077-936e-a7921d4bf3e3',
 'de732a2b-7088-486c-862d-67634d6da469',
 '0ddb8efe-f913-42b2-9fed-287edb8b9cfc',
 '00958610-c365-4e59-b000-36ec3a2ab155',
 'b9352350-24f3-4fa0-a3c7-97ac0b576b08',
 '388c4714-e8fc-499b-8752-eb5581fecde8',
 '871b438c-0abe-45d6-afd5-662c0d0004a4',
 'ead17409-9746-43c3-9775-9418ce9b4d14',
 '1f27d8d4-345f-423f-9858-6bed9fc28be8',
 '93d61c58-8b82-4663-b194-0e7e595bb2a8',
 '6a951f44-7380-44ba-821f-ee6823e9bd3d',
 '1662fdc9-90ca-441a-9d61-22b60caba655',
 'c210c93e-ce4d-4a06-81c9-e07133294ac1',
 '3a92df3b-b2fd-

## Test retrieval

In [7]:
results = vector_store.similarity_search_with_score(
    "Master in Data Science?", k=5
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content}")

* [SIM=0.974700] [![](https://www.ucy.ac.cy/graduateschool/wp-content/uploads/sites/45/2022/05/graduate-school-university-of-cyprus-logo-e1655374191364.jpg)](https://www.ucy.ac.cy/graduateschool/?lang=en "Graduate School")

[![](https://www.ucy.ac.cy/graduateschool/wp-content/uploads/sites/45/2023/04/Apply-for-Master-banner-300X200ENG.webp)](https://www.ucy.ac.cy/graduateschool/postgraduate-programmes-places/?lang=en)

[![](https://www.ucy.ac.cy/graduateschool/wp-content/uploads/sites/45/2022/05/0862_UCY_FUNDRAISING_CAMPAIGN_POST_620x400_ENG_002.png)](https://www.ucy.ac.cy/alumni-development/why-i-give/?lang=en)

[![](https://www.ucy.ac.cy/graduateschool/wp-content/uploads/sites/45/2025/01/International_Students_top.jpg)](https://www.ucy.ac.cy/internationalsupport/international-students/)

- [HOME](https://www.ucy.ac.cy/graduateschool/?lang=en)
- GRADUATE SCHOOL

  - [Vision](https://www.ucy.ac.cy/graduateschool/vision/?lang=en)
  - [Structure of the Graduate School](https://www.ucy.ac

## Save Vector Store

In [8]:
vector_store.save_local("faiss_index")