# Check available LSMS data
Apparently not every country has not a lot of LSMS. In the following code we will check how many countries do have a LSMS and which ones have at least two, so we can perform the time travel. For checking this we are using the [search API](https://microdata.worldbank.org/api-documentation/catalog/index.html#operation/CatalogSearch) provided by the World Bank. We search for the keyword (the parameter `sk`) LSMS and specify the country by using the parameter `country`. For the country we use the ISO.

In [1]:
from tqdm import tqdm 
import pandas as pd
import requests

In [2]:
base_url: str = "https://microdata.worldbank.org/index.php/api/catalog/search?sk=lsms&country="

In [3]:
df: pd.DataFrame = pd.read_csv("../../data/countries_meta/countries_code.csv", index_col=False)

In [7]:
time_valid: pd.DataFrame = pd.DataFrame() # dataframe with country, iso, year and url for time travel
valid: pd.DataFrame = pd.DataFrame() # dataframe with country, iso, year and url for only one LSMS

for _, country in tqdm(df.iterrows(), total=len(df)):
    result: any = requests.get(base_url + country["iso"]).json()
    result = result["result"]

    if result["found"] == 0:
        continue
    
    count_valid: int = 0 # counter to count valid results
    years = [] # save years
    urls = [] # save urls
    for res in result["rows"]:

        if len(res["nation"].split(",")) > 1: # only if the country is alone in the `nation` tag, it is the desired data
            continue
        
        years.append(res["year_end"])
        urls.append(res["url"])
        count_valid += 1
    
    if count_valid > 0: # we just need to create a dataframe when, we found something
        tmp_df: pd.DataFrame = pd.DataFrame.from_dict({"name": country["name"], "iso": country["iso"], "year": years, "url": urls})
       
    if count_valid == 1:
        valid = pd.concat([valid, tmp_df])
    
    if count_valid > 1:
        time_valid = pd.concat([time_valid, tmp_df])

100%|██████████| 49/49 [00:29<00:00,  1.67it/s]


In [8]:
valid.to_csv("../../data/countries_meta/countries_lsms_valid.csv", index=False)
time_valid.to_csv("../../data/countries_meta/countries_lsms_time_valid.csv", index=False)