# Novelpy Tutorial

The goal of this notebook is to show you the scripts to run novelty indicators using Novelpy. If you want comment and information about the scripts, please refer to this notebook: [ Novelpy JSON Tutorial ](https://github.com/Kwirtz/novelpy/tutorial/tuts_json.ipynb). In this notebook we add the connection to MongoDB instead of JSON but the rest is the same.

In [None]:
from novelpy.utils.get_sample import download_sample
download_sample(client_name="mongodb://localhost:27017")

In [None]:
import novelpy

# all the cooc possible not including the one done above

ref_cooc = novelpy.utils.cooc_utils.create_cooc(
                client_name="mongodb://localhost:27017",
                db_name = "novelty_sample",    
                collection_name = "Ref_Journals_sample",
                year_var="year",
                var = "c04_referencelist",
                sub_var = "item",
                time_window = range(1995,2016),
                weighted_network = True, self_loop = True)

ref_cooc.main()

ref_cooc = novelpy.utils.cooc_utils.create_cooc(
                client_name="mongodb://localhost:27017",
                db_name = "novelty_sample",    
                collection_name = "Ref_Journals_sample",
                year_var="year",
                var = "c04_referencelist",
                sub_var = "item",
                time_window = range(1995,2016),
                weighted_network = False, self_loop = False)

ref_cooc.main()

ref_cooc = novelpy.utils.cooc_utils.create_cooc(
                client_name="mongodb://localhost:27017",
                db_name = "novelty_sample",    
                collection_name = "Meshterms_sample",
                year_var="year",
                var = "Mesh_year_category",
                sub_var = "descUI",
                time_window = range(1995,2016),
                weighted_network = True, self_loop = True)

ref_cooc.main()

ref_cooc = novelpy.utils.cooc_utils.create_cooc(
                client_name="mongodb://localhost:27017",
                db_name = "novelty_sample",    
                collection_name = "Meshterms_sample",
                year_var="year",
                var = "Mesh_year_category",
                sub_var = "descUI",
                time_window = range(1995,2016),
                weighted_network = False, self_loop = False)

ref_cooc.main()


In [None]:
import tqdm
import novelpy

# Uzzi et al.(2013) Meshterms_sample
for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Uzzi = novelpy.indicators.Uzzi2013( client_name="mongodb://localhost:27017",
                                        db_name = "novelty_sample",    
                                        collection_name = "Meshterms_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "Mesh_year_category",
                                        sub_variable = "descUI",
                                        focal_year = focal_year,
                                        density = True)
    Uzzi.get_indicator()

# Uzzi et al.(2013) Ref_Journals_sample
for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Uzzi = novelpy.indicators.Uzzi2013( client_name="mongodb://localhost:27017",
                                        db_name = "novelty_sample",
                                        collection_name = "Ref_Journals_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "c04_referencelist",
                                        sub_variable = "item",
                                        focal_year = focal_year,
                                        density = True)
    Uzzi.get_indicator()

# Foster et al.(2015) Meshterms_sample
for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Foster = novelpy.indicators.Foster2015( client_name="mongodb://localhost:27017",
                                            db_name = "novelty_sample",
                                            collection_name = "Meshterms_sample",
                                            id_variable = 'PMID',
                                            year_variable = 'year',
                                            variable = "Mesh_year_category",
                                            sub_variable = "descUI",
                                            focal_year = focal_year,
                                            starting_year = 1995,
                                            community_algorithm = "Louvain",
                                            density = True)
    Foster.get_indicator()

for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Foster = novelpy.indicators.Foster2015( client_name="mongodb://localhost:27017",
                                            db_name = "novelty_sample",
                                            collection_name = "Ref_Journals_sample",
                                            id_variable = 'PMID',
                                            year_variable = 'year',
                                            variable = "c04_referencelist",
                                            sub_variable = "item",
                                            focal_year = focal_year,
                                            starting_year = 1995,
                                            community_algorithm = "Louvain",
                                            density = True)
    Foster.get_indicator()

# Lee et al.(2015) Meshterms_sample
for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Lee = novelpy.indicators.Lee2015( client_name="mongodb://localhost:27017",
                                        db_name = "novelty_sample", 
                                        collection_name = "Meshterms_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "Mesh_year_category",
                                        sub_variable = "descUI",
                                        focal_year = focal_year,
                                        density = True)
    Lee.get_indicator()

# Lee et al.(2015) Ref_Journals_sample
for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
    Lee = novelpy.indicators.Lee2015( client_name="mongodb://localhost:27017",
                                        db_name = "novelty_sample",
                                        collection_name = "Ref_Journals_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "c04_referencelist",
                                        sub_variable = "item",
                                        focal_year = focal_year,
                                        density = True)
    Lee.get_indicator()

# Wang et al.(2017) Meshterms_sample
for focal_year in tqdm.tqdm(range(2000,2011)):
    Wang = novelpy.indicators.Wang2017( client_name="mongodb://localhost:27017",
                                        db_name = "novelty_sample", 
                                        collection_name = "Meshterms_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "Mesh_year_category",
                                        sub_variable = "descUI",
                                        focal_year = focal_year,
                                        time_window_cooc = 3,
                                        n_reutilisation = 1,
                                        starting_year = 1995,
                                        density = True)
    Wang.get_indicator()


# Wang et al.(2017) Ref_Journals_sample
for focal_year in tqdm.tqdm(range(2000,2011)):
    Wang = novelpy.indicators.Wang2017(collection_name = "Ref_Journals_sample",
                                        id_variable = 'PMID',
                                        year_variable = 'year',
                                        variable = "c04_referencelist",
                                        sub_variable = "item",
                                        focal_year = focal_year,
                                        time_window_cooc = 3,
                                        n_reutilisation = 1,
                                        starting_year = 1995,
                                        density = True)
    Wang.get_indicator()

In [None]:
from novelpy.utils.embedding import Embedding

embedding = Embedding(
            client_name="mongodb://localhost:27017",
            db_name = "novelty_sample",
            year_variable = 'year',
            time_range = range(2000,2011),
            id_variable = 'PMID',
            references_variable = 'refs_pmid_wos',
            pretrain_path = 'en_core_sci_lg-0.4.0/en_core_sci_lg/en_core_sci_lg-0.4.0',
            title_variable = 'ArticleTitle',
            abstract_variable = 'a04_abstract',
            abstract_subvariable = 'AbstractText')

# articles

embedding.get_articles_centroid(
      collection_articles = 'Title_abs_sample',
      collection_embedding = 'embedding',
      year_range = range(2000,2011,1))

In [None]:
import novelpy
import tqdm

for focal_year in tqdm.tqdm(range(2000,2011), desc = "Computing indicator for window of time"):
 shibayama = novelpy.indicators.Shibayama2021(client_name="mongodb://localhost:27017",
                                                  db_name = "novelty_sample",
                                                  collection_name = 'Citation_net_sample',
                                                  collection_embedding_name = 'embedding',
                                                  id_variable = 'PMID',
                                                  year_variable = 'year',
                                                  ref_variable = 'refs_pmid_wos',
                                                  entity = ['title_embedding','abstract_embedding'],
                                                  focal_year = focal_year,
                                                  density = True)

 shibayama.get_indicator()

In [None]:
from novelpy.utils import Embedding
from novelpy.utils import create_authors_past
import novelpy

# First step is to create a collection where each doc contains the author ID and its list of document he coauthored
clean = create_authors_past(client_name = 'mongodb://localhost:27017',
                              db_name = 'novelty_sample',
                              collection_name = "authors_sample",
                              id_variable = "PMID",
                              variable = "a02_authorlist",
                              sub_variable = "AID")

clean.author2paper()
clean.update_db()

embedding = Embedding(
      year_variable = 'year',
      id_variable = 'PMID',
      references_variable = 'refs_pmid_wos',
      pretrain_path = r'en_core_sci_lg-0.5.3\en_core_sci_lg\en_core_sci_lg-0.5.3',
      title_variable = 'ArticleTitle',
      abstract_variable = 'a04_abstract',
      abstract_subvariable = 'AbstractText',
      aut_id_variable = 'AID',
      aut_pubs_variable = 'doc_list')


"""
embedding.get_articles_centroid(
      collection_articles = 'Title_abs_sample',
      collection_embedding = 'embedding')
"""



embedding.feed_author_profile(
    aut_id_variable = 'AID',
    aut_pubs_variable = 'doc_list',
    collection_authors = 'authors_sample_cleaned',
    collection_embedding = 'embedding')

In [None]:
from novelpy.indicators.Author_proximity import Author_proximity

for year in range(2000,2011):
    author =  Author_proximity(
                        client_name = 'mongodb://localhost:27017',
                        db_name = 'novelty_sample',
                        collection_name = 'authors_sample',
                        id_variable = 'PMID',
                        year_variable = 'year',
                        aut_list_variable = 'a02_authorlist',
                        aut_id_variable = 'AID',
                        entity = ['title','abstract'],
                        focal_year = year,
                        windows_size = 5,
                        density = True)

    author.get_indicator()

In [None]:
import novelpy

clean = novelpy.utils.preprocess_disruptiveness.create_citation_network(client_name = 'mongodb://localhost:27017',
                                                                        db_name = 'novelty_sample',
                                                                        collection_name = "Citation_net_sample",
                                                                        id_variable = "PMID", variable = "refs_pmid_wos")
clean.id2citedby()
clean.update_db()

In [None]:
import tqdm
import novelpy


for year in range(2000,2011):
    disruptiveness = novelpy.Disruptiveness(
        client_name = 'mongodb://localhost:27017',
        db_name = 'novelty_sample',
        collection_name = 'Citation_net_sample_cleaned',
        focal_year = year,
        id_variable = 'PMID',
        refs_list_variable ='refs',
        cits_list_variable = 'cited_by',
        year_variable = 'year')
    
    disruptiveness.get_indicators(parallel = False)
