In [1]:
!git clone https://github.com/HardikMochi/Assignment.git

Cloning into 'Assignment'...
remote: Enumerating objects: 14, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 14 (delta 3), reused 12 (delta 1), pack-reused 0[K
Unpacking objects: 100% (14/14), done.


In [2]:
cd Assignment

/content/Assignment


In [3]:
!pip install streamlit

Collecting streamlit
[?25l  Downloading https://files.pythonhosted.org/packages/d9/99/a8913c21bd07a14f72658a01784414ffecb380ddd0f9a127257314fea697/streamlit-0.80.0-py2.py3-none-any.whl (8.2MB)
[K     |████████████████████████████████| 8.2MB 3.7MB/s 
Collecting watchdog; platform_system != "Darwin"
[?25l  Downloading https://files.pythonhosted.org/packages/d2/b2/b4ebe23174fd00ec94ac3f58ebf85f1090c49858feab1ca62ed7ea4d2f2f/watchdog-2.0.3-py3-none-manylinux2014_x86_64.whl (74kB)
[K     |████████████████████████████████| 81kB 7.6MB/s 
Collecting base58
  Downloading https://files.pythonhosted.org/packages/b8/a1/d9f565e9910c09fd325dc638765e8843a19fa696275c16cc08cf3b0a3c25/base58-2.1.0-py3-none-any.whl
Collecting validators
  Downloading https://files.pythonhosted.org/packages/db/2f/7fed3ee94ad665ad2c1de87f858f10a7785251ff75b4fd47987888d07ef1/validators-0.18.2-py3-none-any.whl
Collecting blinker
[?25l  Downloading https://files.pythonhosted.org/packages/1b/51/e2a9f3b757eb802f61dc1f2b09c

In [4]:
!pip install faiss-cpu
!pip install -U sentence-transformers

Collecting faiss-cpu
[?25l  Downloading https://files.pythonhosted.org/packages/48/0c/efd43c4feac172867409f38f07949c36602355ec7196749d10f905d09228/faiss_cpu-1.7.0-cp37-cp37m-manylinux2014_x86_64.whl (8.1MB)
[K     |████████████████████████████████| 8.2MB 4.7MB/s 
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.0
Collecting sentence-transformers
[?25l  Downloading https://files.pythonhosted.org/packages/c4/87/49dc49e13ac107ce912c2f3f3fd92252c6d4221e88d1e6c16747044a11d8/sentence-transformers-1.1.0.tar.gz (78kB)
[K     |████████████████████████████████| 81kB 3.6MB/s 
[?25hCollecting transformers<5.0.0,>=3.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 8.9MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e

In [5]:

%%writefile app.py
  
import faiss
import pickle
import pandas as pd
import streamlit as st
from sentence_transformers import SentenceTransformer
from utils import vector_search,id2details
import numpy as np


@st.cache
def read_data(data="data/misinformation_papers.csv"):
    """Read the data from local."""
    return pd.read_csv(data)


@st.cache(allow_output_mutation=True)
def load_bert_model(name="distilbert-base-nli-stsb-mean-tokens"):
    """Instantiate a sentence-level DistilBERT model."""
    return SentenceTransformer(name)

@st.cache(allow_output_mutation=True)
def load_faiss_index(path_to_faiss="models/faiss_index.pickle"):
    """Load and deserialize the Faiss index."""
    with open(path_to_faiss, "rb") as h:
        data = pickle.load(h)
    return faiss.deserialize_index(data)


def main():
    # Load data and models
    data = read_data()
    model = load_bert_model()
    faiss_index = load_faiss_index()

    st.title("Top 5 ranked articles for that query")

    # User search
    user_input = st.text_area("Search box", "Understanding the perception of COVID-19 policies by mining a multilanguage Twitter dataset")

    # Fetch results
    if user_input:
        # Get paper IDs
        D, I = vector_search([user_input], model, faiss_index, num_results=5)
        titles = np.array(id2details(data, I, 'original_title'))
        abstarct = np.array(id2details(data, I, 'abstract'))
        # Get individual results
        for i in range(5):
            st.write(
                f"""**title :{titles[i]}**   
            **Abstract :**
            {abstarct[i]}
            """
            )


if __name__  == "__main__":
    main()



Writing app.py


In [6]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -qq ngrok-stable-linux-amd64.zip

--2021-04-25 04:59:02--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 52.0.242.214, 3.224.116.172, 35.170.116.11, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.0.242.214|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13828408 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2021-04-25 04:59:02 (48.2 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13828408/13828408]



In [8]:
get_ipython().system_raw('./ngrok http 8501 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

https://0587fecb3602.ngrok.io


In [9]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.2:8501[0m
[34m  External URL: [0m[1mhttp://35.231.5.11:8501[0m
[0m
2021-04-25 04:59:16.193 Loading faiss with AVX2 support.
2021-04-25 04:59:16.194 Loading faiss.
2021-04-25 04:59:21.446762: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-25 04:59:23.414 Load pretrained SentenceTransformer: distilbert-base-nli-stsb-mean-tokens
2021-04-25 04:59:23.415 Did not find folder distilbert-base-nli-stsb-mean-tokens
2021-04-25 04:59:23.415 Search model on server: http://sbert.net/models/distilbert-base-nli-stsb-mean-tokens.zip
2021-04-25 04:59:23.416 Downloading sentence transformer model from http://sbert.net/models/distilbert-base-nli-stsb-mean-tokens.zip and saving it at /root/.cache/torch/sentence_transformers/sbert.net_models_distilbert-base-nli-stsb-mean-tokens
100% 245M/245M [00:13<00:0