*Trabajo de Fin de Máster. Ingeniería del Software: Cloud, Datos y Gestión TI*

------

# Respuesta frente a la COVID-19: Un análisis de GitHub

##### *Jacinto José Ruiz Díaz*

## Capítulo 4.2. Búsqueda de repositorios

------

In [1]:
import requests
import json
import pandas as pd 
import time
from datetime import timedelta, datetime
import sys
pd.options.mode.chained_assignment = None
from pandasgui import show

### Creación de cabecera con token de identificación

In [2]:
token = "" # Rellenar con token generado en GitHub
header = {'Authorization': 'token %s' % token}

### Límites de consultas

In [4]:
limits = requests.get('https://api.github.com/rate_limit')
print(json.dumps(limits.json(), indent=4, sort_keys=True))

{
    "rate": {
        "limit": 60,
        "remaining": 60,
        "reset": 1630632389,
        "resource": "core",
        "used": 0
    },
    "resources": {
        "core": {
            "limit": 60,
            "remaining": 60,
            "reset": 1630632389,
            "resource": "core",
            "used": 0
        },
        "graphql": {
            "limit": 0,
            "remaining": 0,
            "reset": 1630632389,
            "resource": "graphql",
            "used": 0
        },
        "integration_manifest": {
            "limit": 5000,
            "remaining": 5000,
            "reset": 1630632389,
            "resource": "integration_manifest",
            "used": 0
        },
        "search": {
            "limit": 10,
            "remaining": 10,
            "reset": 1630628849,
            "resource": "search",
            "used": 0
        }
    }
}


In [5]:
limits = requests.get('https://api.github.com/rate_limit', headers=header)
print(json.dumps(limits.json(), indent=4, sort_keys=True))

{
    "rate": {
        "limit": 5000,
        "remaining": 5000,
        "reset": 1630632398,
        "used": 0
    },
    "resources": {
        "code_scanning_upload": {
            "limit": 500,
            "remaining": 500,
            "reset": 1630632398,
            "used": 0
        },
        "core": {
            "limit": 5000,
            "remaining": 5000,
            "reset": 1630632398,
            "used": 0
        },
        "graphql": {
            "limit": 5000,
            "remaining": 5000,
            "reset": 1630632398,
            "used": 0
        },
        "integration_manifest": {
            "limit": 5000,
            "remaining": 5000,
            "reset": 1630632398,
            "used": 0
        },
        "search": {
            "limit": 30,
            "remaining": 30,
            "reset": 1630628858,
            "used": 0
        },
        "source_import": {
            "limit": 100,
            "remaining": 100,
            "reset": 1630628858,
    

### Búsqueda de repositorios

In [12]:
used_cols = ["id", "name", "full_name", "description", "contributors_url", "archived", "disabled", "stargazers_count",
             "watchers_count", "forks_count", "has_downloads", "has_issues", "has_pages", "has_projects", "has_wiki", 
             "homepage", "html_url", "language", "created_at", "pushed_at", "updated_at", "svn_url", "owner.login"]

In [29]:
def get_df_from_github_calls(response):
    
    response_json = response.json()
    total_count = response_json["total_count"]
    
    print('Procesando URL: {}'.format(response.url))
    
    print('Número de resultados: {}'.format(total_count))
    
    print("Creando dataframe...")

    df = pd.json_normalize(response_json["items"])
    print("Tamaño actual dataframe: {}".format(df.shape[0]))

    while("next" in response.links):
        print("Esperando 7 segundos...")
        time.sleep(wait_time) #Espera de 7 segundos

        print('Procesando URL: {}'.format(response.links["next"]["url"]))
        response = requests.get(response.links["next"]["url"], headers=header)
        response_json = response.json()

        print("Actualizando dataframe...")
        df_next = pd.json_normalize(response_json["items"])
        df = df.append(df_next)
        print("Tamaño actual dataframe: {}".format(df.shape[0]))

    if(df.shape[0] != total_count):
        print("--- [ERROR] Se han almacenado menos resultados de lo esperado \n Número total: {} \n Resultados almacenados: {}"
                                                                                        .format(total_count, df.shape[0]))
        sys.exit(1)
    # Selección solo de las columnas que vamos a utilizar
    df_reducido = df[used_cols]
    
    return df_reducido

In [30]:
def get_df_from_split_calls(date):
    since_date1 = date
    until_date1 = date + timedelta(hours = 12)
    since_date2 = until_date1 + timedelta(seconds = 1)
    until_date2 = until_date1 + timedelta(hours = 11, minutes = 59, seconds = 59)
    
    res_dataframe = pd.DataFrame(columns=used_cols)
    
    query1 = "?per_page=100" + '&q=' + keyword + " created:" + since_date1.strftime("%Y-%m-%dT%H:%M:%S") \
                                                             + ".." + until_date1.strftime("%Y-%m-%dT%H:%M:%S") 
    query2 = "?per_page=100" + '&q=' + keyword + " created:" + since_date2.strftime("%Y-%m-%dT%H:%M:%S") \
                                                             + ".." + until_date2.strftime("%Y-%m-%dT%H:%M:%S") 
    queries = [query1, query2]
    
    for index, query in enumerate(queries, start=1):
        response = requests.get('https://api.github.com/search/repositories' + query, headers=header)
        response_json = response.json()
        
        total_count = response_json["total_count"]
        print("\n --- Tamaño llamada {}: {} ---".format(index, total_count))
        
        print('Procesando URL: {}'.format(response.url))
        print("Creando dataframe...")

        df = pd.json_normalize(response_json["items"])
        print("Tamaño actual dataframe: {}".format(df.shape[0]))

        while("next" in response.links):
            print("Esperando 7 segundos...")
            time.sleep(wait_time) #Espera de 7 segundos

            print('Procesando URL: {}'.format(response.links["next"]["url"]))
            response = requests.get(response.links["next"]["url"], headers=header)
            response_json = response.json()

            print("Actualizando dataframe...")
            df_next = pd.json_normalize(response_json["items"])
            df = df.append(df_next)
            print("Tamaño actual dataframe: {}".format(df.shape[0]))

        if(df.shape[0] != total_count):
            print("--- [ERROR] Se han almacenado menos resultados de lo esperado \n Número total: {} \n Resultados almacenados: {}"
                                                                                            .format(total_count, df.shape[0]))
            sys.exit(1)
        # Selección solo de las columnas que vamos a utilizar
        df_reducido = df[used_cols]

        res_dataframe = res_dataframe.append(df_reducido)
        
    return res_dataframe

In [34]:
keyword = "covid-19"

date = datetime(2020, 3, 2)

limit_date = datetime(2021, 7, 25)

i = 2
while(date <= limit_date):
    print("\n\n -- PROCESANDO FECHA {} --".format(date.strftime("%Y-%m-%d")))
    
    query = "?per_page=100" + '&q=' + keyword + " created:" + date.strftime("%Y-%m-%d")

    response = requests.get('https://api.github.com/search/repositories' + query, headers=header)
    response_json = response.json()
    
    total_count = response_json["total_count"]
    
    if(total_count > 1000):
        print("------- Tamaño mayor a 1000 ({}). Dividimos la llamada... -------".format(total_count))
        df = get_df_from_split_calls(date)
        
    else:
        print("Esperando 5 segundos...")
        time.sleep(5) #Espera de 5 segundos
        df = get_df_from_github_calls(response)

    df.to_csv(r''+keyword+'_'+ str(i) +'.csv', index = False, encoding="utf-8-sig")
        
    date += timedelta(days = 1)
    i += 1



 -- PROCESANDO FECHA 2021-01-04 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-01-04
Número de resultados: 130
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-01-04&page=2
Actualizando dataframe...
Tamaño actual dataframe: 130


 -- PROCESANDO FECHA 2021-01-05 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-01-05
Número de resultados: 157
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-01-05&page=2
Actualizando dataframe...
Tamaño actual dataframe: 157


 -- PROCESANDO FECHA 2021-01-06 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=cov

Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-01-23
Número de resultados: 121
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-01-23&page=2
Actualizando dataframe...
Tamaño actual dataframe: 121


 -- PROCESANDO FECHA 2021-01-24 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-01-24
Número de resultados: 144
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-01-24&page=2
Actualizando dataframe...
Tamaño actual dataframe: 144


 -- PROCESANDO FECHA 2021-01-25 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-01-25
Número de 

Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-02-12
Número de resultados: 95
Creando dataframe...
Tamaño actual dataframe: 95


 -- PROCESANDO FECHA 2021-02-13 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-02-13
Número de resultados: 103
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-02-13&page=2
Actualizando dataframe...
Tamaño actual dataframe: 103


 -- PROCESANDO FECHA 2021-02-14 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-02-14
Número de resultados: 110
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-02-1

Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-03-04&page=2
Actualizando dataframe...
Tamaño actual dataframe: 131


 -- PROCESANDO FECHA 2021-03-05 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-05
Número de resultados: 74
Creando dataframe...
Tamaño actual dataframe: 74


 -- PROCESANDO FECHA 2021-03-06 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-06
Número de resultados: 114
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-03-06&page=2
Actualizando dataframe...
Tamaño actual dataframe: 114


 -- PROCESANDO FECHA 2021-03-07 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-07

Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-03-26&page=2
Actualizando dataframe...
Tamaño actual dataframe: 114


 -- PROCESANDO FECHA 2021-03-27 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-27
Número de resultados: 96
Creando dataframe...
Tamaño actual dataframe: 96


 -- PROCESANDO FECHA 2021-03-28 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-28
Número de resultados: 97
Creando dataframe...
Tamaño actual dataframe: 97


 -- PROCESANDO FECHA 2021-03-29 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-03-29
Número de resultados: 115
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%

Actualizando dataframe...
Tamaño actual dataframe: 115


 -- PROCESANDO FECHA 2021-04-17 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-04-17
Número de resultados: 100
Creando dataframe...
Tamaño actual dataframe: 100


 -- PROCESANDO FECHA 2021-04-18 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-04-18
Número de resultados: 123
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-04-18&page=2
Actualizando dataframe...
Tamaño actual dataframe: 123


 -- PROCESANDO FECHA 2021-04-19 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-04-19
Número de resultados: 128
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesan

Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-05-06&page=2
Actualizando dataframe...
Tamaño actual dataframe: 157


 -- PROCESANDO FECHA 2021-05-07 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-05-07
Número de resultados: 166
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-05-07&page=2
Actualizando dataframe...
Tamaño actual dataframe: 166


 -- PROCESANDO FECHA 2021-05-08 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-05-08
Número de resultados: 139
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-05-08&page=2
Actualizando dataframe...
T

Actualizando dataframe...
Tamaño actual dataframe: 164


 -- PROCESANDO FECHA 2021-05-26 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-05-26
Número de resultados: 131
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-05-26&page=2
Actualizando dataframe...
Tamaño actual dataframe: 131


 -- PROCESANDO FECHA 2021-05-27 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-05-27
Número de resultados: 163
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-05-27&page=2
Actualizando dataframe...
Tamaño actual dataframe: 163


 -- PROCESANDO FECHA 2021-05-28 --
Esperando 5 segundos...
Procesando URL: https:

Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-06-14
Número de resultados: 143
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-06-14&page=2
Actualizando dataframe...
Tamaño actual dataframe: 143


 -- PROCESANDO FECHA 2021-06-15 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-06-15
Número de resultados: 123
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-06-15&page=2
Actualizando dataframe...
Tamaño actual dataframe: 123


 -- PROCESANDO FECHA 2021-06-16 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-06-16
Número de 

Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-07-03
Número de resultados: 121
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-07-03&page=2
Actualizando dataframe...
Tamaño actual dataframe: 121


 -- PROCESANDO FECHA 2021-07-04 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-07-04
Número de resultados: 126
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-07-04&page=2
Actualizando dataframe...
Tamaño actual dataframe: 126


 -- PROCESANDO FECHA 2021-07-05 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-07-05
Número de resultados: 111
Creando 

Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-07-22&page=2
Actualizando dataframe...
Tamaño actual dataframe: 109


 -- PROCESANDO FECHA 2021-07-23 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-07-23
Número de resultados: 112
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-07-23&page=2
Actualizando dataframe...
Tamaño actual dataframe: 112


 -- PROCESANDO FECHA 2021-07-24 --
Esperando 5 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19%20created:2021-07-24
Número de resultados: 124
Creando dataframe...
Tamaño actual dataframe: 100
Esperando 7 segundos...
Procesando URL: https://api.github.com/search/repositories?per_page=100&q=covid-19+created%3A2021-07-24&page=2
Actualizando dataframe...
T

----

### Unión de ficheros CSV

In [1]:
import os
from glob import glob
import pandas as pd

path = "/coronavirus/"

In [10]:
files = glob("coronavirus/coronavirus_*.csv")
files

['coronavirus\\coronavirus_1.csv',
 'coronavirus\\coronavirus_10.csv',
 'coronavirus\\coronavirus_100.csv',
 'coronavirus\\coronavirus_101.csv',
 'coronavirus\\coronavirus_102.csv',
 'coronavirus\\coronavirus_103.csv',
 'coronavirus\\coronavirus_104.csv',
 'coronavirus\\coronavirus_105.csv',
 'coronavirus\\coronavirus_106.csv',
 'coronavirus\\coronavirus_107.csv',
 'coronavirus\\coronavirus_108.csv',
 'coronavirus\\coronavirus_109.csv',
 'coronavirus\\coronavirus_11.csv',
 'coronavirus\\coronavirus_110.csv',
 'coronavirus\\coronavirus_111.csv',
 'coronavirus\\coronavirus_112.csv',
 'coronavirus\\coronavirus_113.csv',
 'coronavirus\\coronavirus_114.csv',
 'coronavirus\\coronavirus_115.csv',
 'coronavirus\\coronavirus_116.csv',
 'coronavirus\\coronavirus_117.csv',
 'coronavirus\\coronavirus_118.csv',
 'coronavirus\\coronavirus_119.csv',
 'coronavirus\\coronavirus_12.csv',
 'coronavirus\\coronavirus_120.csv',
 'coronavirus\\coronavirus_121.csv',
 'coronavirus\\coronavirus_122.csv',
 'coro

In [11]:
df = pd.concat(map(pd.read_csv, files), ignore_index=True)

In [12]:
df

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,license,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,,https://github.com/midas-network/COVID-19,,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15237,270364345,covid-berlin-scraper,jakubvalenta/covid-berlin-scraper,Download coronavirus data from the official pr...,https://api.github.com/repos/jakubvalenta/covi...,False,False,1,1,0,...,True,,https://github.com/jakubvalenta/covid-berlin-s...,,HTML,2020-06-07T16:26:05Z,2021-06-04T09:02:03Z,2021-06-04T09:01:29Z,https://github.com/jakubvalenta/covid-berlin-s...,jakubvalenta
15238,270337965,Covid19,Fgazzelloni/Covid19,This repository is about Covid19 Outbreak,https://api.github.com/repos/Fgazzelloni/Covid...,False,False,0,0,0,...,True,,https://github.com/Fgazzelloni/Covid19,,R,2020-06-07T14:53:09Z,2021-04-03T17:13:27Z,2021-04-03T17:13:29Z,https://github.com/Fgazzelloni/Covid19,Fgazzelloni
15239,270412711,CovidDataParsers,tudorpop9/CovidDataParsers,Programs used to refine the raw data from http...,https://api.github.com/repos/tudorpop9/CovidDa...,False,False,0,0,0,...,True,,https://github.com/tudorpop9/CovidDataParsers,,Java,2020-06-07T19:39:56Z,2020-06-07T19:45:31Z,2020-06-07T19:45:36Z,https://github.com/tudorpop9/CovidDataParsers,tudorpop9
15240,270428942,COVID-Compare,Ashwins9001/COVID-Compare,Find keyword trends (Using k-means and gaussia...,https://api.github.com/repos/Ashwins9001/COVID...,False,False,0,0,0,...,True,,https://github.com/Ashwins9001/COVID-Compare,,Jupyter Notebook,2020-06-07T20:50:54Z,2020-06-07T21:06:09Z,2020-08-03T20:18:37Z,https://github.com/Ashwins9001/COVID-Compare,Ashwins9001


In [13]:
df.to_csv(r'coronavirus_merged.csv', index = False, encoding="utf-8-sig")

### Unión de los cuatro datasets

In [16]:
df_completo = pd.DataFrame(columns=used_cols)

coronavirus = pd.read_csv('coronavirus_merged.csv')
covid = pd.read_csv('covid_merged.csv')
covid19 = pd.read_csv('covid19_merged.csv')
covid_19 = pd.read_csv('covid-19_merged.csv')

dataframes = [coronavirus, covid, covid19, covid_19]

In [17]:
for df in dataframes:
    df_completo = df_completo.append(df)

In [18]:
df_completo

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,license,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,,https://github.com/midas-network/COVID-19,,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110986,270364345,covid-berlin-scraper,jakubvalenta/covid-berlin-scraper,Download coronavirus data from the official pr...,https://api.github.com/repos/jakubvalenta/covi...,False,False,1,1,0,...,True,,https://github.com/jakubvalenta/covid-berlin-s...,,HTML,2020-06-07T16:26:05Z,2021-06-04T09:02:03Z,2021-06-04T09:01:29Z,https://github.com/jakubvalenta/covid-berlin-s...,jakubvalenta
110987,270393880,PAP_V20_Covid_SocialAnalysis,AraceliCastillo/PAP_V20_Covid_SocialAnalysis,"En este PAP, se busca desarrollar un dashboard...",https://api.github.com/repos/AraceliCastillo/P...,False,False,0,0,0,...,True,,https://github.com/AraceliCastillo/PAP_V20_Cov...,,Jupyter Notebook,2020-06-07T18:21:52Z,2020-06-16T17:43:36Z,2020-06-16T17:43:38Z,https://github.com/AraceliCastillo/PAP_V20_Cov...,AraceliCastillo
110988,270466209,ADLTS,trevinofernando/ADLTS,A Unity Simulation for the Automated Drone Las...,https://api.github.com/repos/trevinofernando/A...,False,False,0,0,0,...,True,,https://github.com/trevinofernando/ADLTS,,C++,2020-06-07T23:53:01Z,2020-12-05T16:45:14Z,2020-12-05T16:45:16Z,https://github.com/trevinofernando/ADLTS,trevinofernando
110989,270379783,covidn,aiosk/covidn,covid19 cases both national and provinces in i...,https://api.github.com/repos/aiosk/covidn/cont...,False,False,0,0,0,...,True,https://aiosk.github.io/covidn/,https://github.com/aiosk/covidn,,Vue,2020-06-07T17:25:40Z,2021-08-12T16:56:47Z,2020-09-21T20:36:47Z,https://github.com/aiosk/covidn,aiosk


In [34]:
df_completo.shape[0]

337195

### Limpieza y preprocesamiento

Eliminamos las filas duplicadas.

In [28]:
print(df_completo.duplicated(['id']).sum())

159125


In [32]:
df_deduplicado = df_completo.drop_duplicates(subset=['id'])

In [33]:
df_deduplicado

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,license,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,,https://github.com/midas-network/COVID-19,,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110402,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
110652,270034436,mzansi_corona,marvinIsSacul/mzansi_corona,Flutter app about South Africa's Corona Virus ...,https://api.github.com/repos/marvinIsSacul/mza...,False,False,0,0,0,...,True,,https://github.com/marvinIsSacul/mzansi_corona,,Dart,2020-06-06T16:15:43Z,2020-07-22T12:22:12Z,2021-03-08T18:24:34Z,https://github.com/marvinIsSacul/mzansi_corona,marvinIsSacul
110656,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,,https://github.com/jahnavi-prasad/face-mask-de...,,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
110661,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,https://tackode.com,https://github.com/Tackode/frontend,,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


In [35]:
# Exportamos a CSV para no perder el progreso
df_deduplicado.to_csv(r'df_deduplicado.csv', index = False, encoding="utf-8-sig")

Eliminamos la columna license, obtenida por error y sin haberse parseado correctamente desde JSON a tabla por ser objeto:

In [126]:
df = pd.read_csv('df_deduplicado.csv')

df_sin_license = df.drop(columns=['license'])

Finalmente lo exportamos de nuevo:

In [127]:
df_sin_license.to_csv(r'df_repositorios.csv', index = False, encoding="utf-8-sig")

------

## Filtrado

In [10]:
df = pd.read_csv('df_repositorios.csv')
df

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178066,270034436,mzansi_corona,marvinIsSacul/mzansi_corona,Flutter app about South Africa's Corona Virus ...,https://api.github.com/repos/marvinIsSacul/mza...,False,False,0,0,0,...,True,True,,https://github.com/marvinIsSacul/mzansi_corona,Dart,2020-06-06T16:15:43Z,2020-07-22T12:22:12Z,2021-03-08T18:24:34Z,https://github.com/marvinIsSacul/mzansi_corona,marvinIsSacul
178067,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,True,,https://github.com/jahnavi-prasad/face-mask-de...,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


Con la librería pandasgui podemos visualizar mejor los dataframes de Pandas:

In [3]:
from pandasgui import show
show(df)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x196bee699d0>

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178070 entries, 0 to 178069
Data columns (total 23 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   id                178070 non-null  int64 
 1   name              178070 non-null  object
 2   full_name         178070 non-null  object
 3   description       100021 non-null  object
 4   contributors_url  178070 non-null  object
 5   archived          178070 non-null  bool  
 6   disabled          178070 non-null  bool  
 7   stargazers_count  178070 non-null  int64 
 8   watchers_count    178070 non-null  int64 
 9   forks_count       178070 non-null  int64 
 10  has_downloads     178070 non-null  bool  
 11  has_issues        178070 non-null  bool  
 12  has_pages         178070 non-null  bool  
 13  has_projects      178070 non-null  bool  
 14  has_wiki          178070 non-null  bool  
 15  homepage          15382 non-null   object
 16  html_url          178070 non-null  obj

## Palabras relacionadas

In [19]:
related_name_words = ["corona", "coronavirus", "covid", "covid19", "covid-19", "sars-cov-2", "sars-cov-2", "sars-cov", "cov", 
                 "ncov", "ncov2019", "ncov19", "19ncov", "19-ncov", "2019ncov", "2019-ncov", "virus", "pandemic", "epidemic",
                    "新冠病毒", "新型冠状病毒"]
related_description_words = ["corona", "coronavirus", "covid", "covid19", "covid-19", "sars-cov-2", "sars-cov-2", "sars-cov", 
                             "cov", "ncov", "ncov2019", "ncov19", "19ncov", "19-ncov", "2019ncov", "2019-ncov", "virus", 
                             "pandemic", "epidemic", "新冠病毒", "新型冠状病毒", "cases", "contagios" "reported", "medical", 
                             "infections", "infection", "lockdown", "viral", "mask", "vaccine", "disease", "deaths", 
                             "pneumonia"]

In [20]:
pattern_name = "(?i)" + '|'.join(related_name_words)
pattern_description = "(?i)" + '|'.join(related_description_words)
pattern_description

'(?i)corona|coronavirus|covid|covid19|covid-19|sars-cov-2|sars-cov-2|sars-cov|cov|ncov|ncov2019|ncov19|19ncov|19-ncov|2019ncov|2019-ncov|virus|pandemic|epidemic|新冠病毒|新型冠状病毒|cases|contagiosreported|medical|infections|infection|lockdown|viral|mask|vaccine|disease|deaths|pneumonia'

In [9]:
df = pd.read_csv('repositorios_primer_filtrado.csv')
df

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178038,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178039,270034436,mzansi_corona,marvinIsSacul/mzansi_corona,Flutter app about South Africa's Corona Virus ...,https://api.github.com/repos/marvinIsSacul/mza...,False,False,0,0,0,...,True,True,,https://github.com/marvinIsSacul/mzansi_corona,Dart,2020-06-06T16:15:43Z,2020-07-22T12:22:12Z,2021-03-08T18:24:34Z,https://github.com/marvinIsSacul/mzansi_corona,marvinIsSacul
178040,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,True,,https://github.com/jahnavi-prasad/face-mask-de...,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
178041,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


### Comprobación solo en el nombre:

Dataframe resultante con columna *full_name* conteniendo el patrón:

In [7]:
df_in_name_boolean = df.full_name.str.contains(pattern_name)
df_in_name_boolean

0          True
1          True
2          True
3          True
4          True
          ...  
178065    False
178066     True
178067    False
178068    False
178069    False
Name: full_name, Length: 178070, dtype: bool

In [8]:
df_in_name_boolean.sum()

159157

In [9]:
df_in_name = df[df_in_name_boolean]
df_in_name

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178054,268148998,Epidemics-Modelling,pierpaolo28/Epidemics-Modelling,Modelling and tracking of epidemics data analy...,https://api.github.com/repos/pierpaolo28/Epide...,False,False,1,1,0,...,True,True,http://3.22.240.181:8501,https://github.com/pierpaolo28/Epidemics-Model...,Jupyter Notebook,2020-05-30T19:48:56Z,2020-11-17T19:44:00Z,2021-01-30T21:40:09Z,https://github.com/pierpaolo28/Epidemics-Model...,pierpaolo28
178056,268314248,UI,CovidToday/UI,,https://api.github.com/repos/CovidToday/UI/con...,False,False,0,0,0,...,True,False,,https://github.com/CovidToday/UI,JavaScript,2020-05-31T16:02:18Z,2021-08-12T15:32:29Z,2020-09-03T02:43:14Z,https://github.com/CovidToday/UI,CovidToday
178061,268770101,coroname,mariabnd/coroname,,https://api.github.com/repos/mariabnd/coroname...,False,False,0,0,0,...,True,True,,https://github.com/mariabnd/coroname,,2020-06-02T10:25:35Z,2021-01-06T12:52:34Z,2021-01-06T12:52:37Z,https://github.com/mariabnd/coroname,mariabnd
178064,269601203,CoronaDashboard,StefH/CoronaDashboard,Een Corona Dashboard gebaseerd op cijfers van ...,https://api.github.com/repos/StefH/CoronaDashb...,False,False,1,1,0,...,True,True,,https://github.com/StefH/CoronaDashboard,C#,2020-06-05T10:29:30Z,2021-07-19T15:54:46Z,2021-07-19T15:52:49Z,https://github.com/StefH/CoronaDashboard,StefH


**159.157** repositorios.

### Comprobación solo en la descripción:

Comprobamos previamente si hay repositorios con la descripción vacía:

In [10]:
df["description"].isnull().sum()

78049

Existen 78049 repositorios sin descripción. Lo tendremos en cuenta a continuación.

In [21]:
df_in_description_boolean = df.description.str.contains(pattern_description)
df_in_description_boolean

0          True
1          True
2          True
3          True
4          True
          ...  
178065    False
178066     True
178067     True
178068    False
178069    False
Name: description, Length: 178070, dtype: object

De aquí saldrán 78049 valores que no se corresponderán a False ni True, por lo que no nos dejará filtrar el dataframe. Los convertimos a False:

In [12]:
print(df_in_description_boolean.isnull().sum())

78049


In [13]:
df_in_description_boolean = df_in_description_boolean.fillna(False)

#Comprobamos si tiene valores nulos
print(df_in_description_boolean.isnull().sum())

0


Contamos cuantos repositorios tienen las palabras relacionadas en la descripción:

In [14]:
df_in_description_boolean.sum()

84100

In [15]:
df_in_description = df[df_in_description_boolean]
df_in_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178051,267858096,face_mask_detection_openvino,mmphego/face_mask_detection_openvino,Detect faces and determine whether people are ...,https://api.github.com/repos/mmphego/face_mask...,False,False,35,35,11,...,True,True,https://blog.mphomphego.co.za/blog/2020/06/02/...,https://github.com/mmphego/face_mask_detection...,Python,2020-05-29T12:56:47Z,2021-02-16T12:18:40Z,2021-05-25T21:53:17Z,https://github.com/mmphego/face_mask_detection...,mmphego
178054,268148998,Epidemics-Modelling,pierpaolo28/Epidemics-Modelling,Modelling and tracking of epidemics data analy...,https://api.github.com/repos/pierpaolo28/Epide...,False,False,1,1,0,...,True,True,http://3.22.240.181:8501,https://github.com/pierpaolo28/Epidemics-Model...,Jupyter Notebook,2020-05-30T19:48:56Z,2020-11-17T19:44:00Z,2021-01-30T21:40:09Z,https://github.com/pierpaolo28/Epidemics-Model...,pierpaolo28
178064,269601203,CoronaDashboard,StefH/CoronaDashboard,Een Corona Dashboard gebaseerd op cijfers van ...,https://api.github.com/repos/StefH/CoronaDashb...,False,False,1,1,0,...,True,True,,https://github.com/StefH/CoronaDashboard,C#,2020-06-05T10:29:30Z,2021-07-19T15:54:46Z,2021-07-19T15:52:49Z,https://github.com/StefH/CoronaDashboard,StefH
178066,270034436,mzansi_corona,marvinIsSacul/mzansi_corona,Flutter app about South Africa's Corona Virus ...,https://api.github.com/repos/marvinIsSacul/mza...,False,False,0,0,0,...,True,True,,https://github.com/marvinIsSacul/mzansi_corona,Dart,2020-06-06T16:15:43Z,2020-07-22T12:22:12Z,2021-03-08T18:24:34Z,https://github.com/marvinIsSacul/mzansi_corona,marvinIsSacul


**84.100** repositorios.

### En el nombre y no la descripción:

In [16]:
#En el nombre:
df_in_name_not_in_description = df.full_name.str.contains(pattern_name)
df_in_name_not_in_description

0          True
1          True
2          True
3          True
4          True
          ...  
178065    False
178066     True
178067    False
178068    False
178069    False
Name: full_name, Length: 178070, dtype: bool

In [17]:
df_in_name_not_in_description = df[df_in_name_not_in_description]
df_in_name_not_in_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178054,268148998,Epidemics-Modelling,pierpaolo28/Epidemics-Modelling,Modelling and tracking of epidemics data analy...,https://api.github.com/repos/pierpaolo28/Epide...,False,False,1,1,0,...,True,True,http://3.22.240.181:8501,https://github.com/pierpaolo28/Epidemics-Model...,Jupyter Notebook,2020-05-30T19:48:56Z,2020-11-17T19:44:00Z,2021-01-30T21:40:09Z,https://github.com/pierpaolo28/Epidemics-Model...,pierpaolo28
178056,268314248,UI,CovidToday/UI,,https://api.github.com/repos/CovidToday/UI/con...,False,False,0,0,0,...,True,False,,https://github.com/CovidToday/UI,JavaScript,2020-05-31T16:02:18Z,2021-08-12T15:32:29Z,2020-09-03T02:43:14Z,https://github.com/CovidToday/UI,CovidToday
178061,268770101,coroname,mariabnd/coroname,,https://api.github.com/repos/mariabnd/coroname...,False,False,0,0,0,...,True,True,,https://github.com/mariabnd/coroname,,2020-06-02T10:25:35Z,2021-01-06T12:52:34Z,2021-01-06T12:52:37Z,https://github.com/mariabnd/coroname,mariabnd
178064,269601203,CoronaDashboard,StefH/CoronaDashboard,Een Corona Dashboard gebaseerd op cijfers van ...,https://api.github.com/repos/StefH/CoronaDashb...,False,False,1,1,0,...,True,True,,https://github.com/StefH/CoronaDashboard,C#,2020-06-05T10:29:30Z,2021-07-19T15:54:46Z,2021-07-19T15:52:49Z,https://github.com/StefH/CoronaDashboard,StefH


No en la descripción:

In [18]:
df_in_name_not_in_description_boolean = df.description.str.contains(pattern_description) == False
df_in_name_not_in_description_boolean 

0         False
1         False
2         False
3         False
4         False
          ...  
178065     True
178066    False
178067    False
178068     True
178069     True
Name: description, Length: 178070, dtype: bool

In [19]:
df_in_name_not_in_description = df_in_name_not_in_description[df_in_name_not_in_description_boolean]
df_in_name_not_in_description

  df_in_name_not_in_description = df_in_name_not_in_description[df_in_name_not_in_description_boolean]


Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
56,238121123,coronavirus,bjwa2020/coronavirus,新冠疫情小区分布数据（数据来自各城市卫健委等官方微信公众号和官方网站）,https://api.github.com/repos/bjwa2020/coronavi...,False,False,23,23,3,...,True,True,https://www.gubi.io/,https://github.com/bjwa2020/coronavirus,,2020-02-04T04:15:07Z,2020-02-29T07:49:55Z,2021-07-30T00:11:10Z,https://github.com/bjwa2020/coronavirus,bjwa2020
70,238965247,coronavirus,weimeilin79/coronavirus,Camel K Serverless Demo,https://api.github.com/repos/weimeilin79/coron...,False,False,5,5,4,...,True,True,,https://github.com/weimeilin79/coronavirus,JavaScript,2020-02-07T15:59:25Z,2020-09-11T19:15:11Z,2020-09-16T08:36:30Z,https://github.com/weimeilin79/coronavirus,weimeilin79
86,238643080,Noval-Coronavirus-763-Cases,839-Studio/Noval-Coronavirus-763-Cases,763例冠状病毒确诊患者的详细情况，包括出行轨迹、发病就诊时间、人传人情况。,https://api.github.com/repos/839-Studio/Noval-...,False,False,11,11,4,...,True,True,,https://github.com/839-Studio/Noval-Coronaviru...,,2020-02-06T08:40:28Z,2020-02-06T08:54:43Z,2020-09-16T07:28:46Z,https://github.com/839-Studio/Noval-Coronaviru...,839-Studio
148,239239938,coronavirus-2019-nCov,C0D4-101/coronavirus-2019-nCov,Data Extracts from World Health Organization,https://api.github.com/repos/C0D4-101/coronavi...,False,False,8,8,1,...,True,True,,https://github.com/C0D4-101/coronavirus-2019-nCov,HTML,2020-02-09T03:21:05Z,2020-02-28T09:05:32Z,2020-07-18T18:33:17Z,https://github.com/C0D4-101/coronavirus-2019-nCov,C0D4-101
158,242778847,Here-Corona,SoonGwan/Here-Corona,"국내, 전세계 코로나 현황을 한눈에 알아 볼 수 있으며, 여러 정보를 제공하는 웹 ...",https://api.github.com/repos/SoonGwan/Here-Cor...,False,False,8,8,3,...,True,True,,https://github.com/SoonGwan/Here-Corona,JavaScript,2020-02-24T15:59:21Z,2021-01-05T21:46:26Z,2021-04-26T22:44:09Z,https://github.com/SoonGwan/Here-Corona,SoonGwan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177930,259285155,Novel-Corona-India,ni3mumbaikar/Novel-Corona-India,This Project is good example of web scraping u...,https://api.github.com/repos/ni3mumbaikar/Nove...,False,False,1,1,2,...,True,True,,https://github.com/ni3mumbaikar/Novel-Corona-I...,EJS,2020-04-27T10:49:10Z,2021-08-04T06:09:17Z,2021-08-04T06:09:19Z,https://github.com/ni3mumbaikar/Novel-Corona-I...,ni3mumbaikar
177956,260912128,Covid-19-info-Counter,KhanOmair/Covid-19-info-Counter,This is a Project I created by me while I was ...,https://api.github.com/repos/KhanOmair/Covid-1...,False,False,0,0,0,...,True,True,,https://github.com/KhanOmair/Covid-19-info-Cou...,Dart,2020-05-03T12:33:01Z,2020-05-03T13:56:42Z,2020-05-03T13:56:45Z,https://github.com/KhanOmair/Covid-19-info-Cou...,KhanOmair
177958,261163082,Project-COVgh-V1,E-B-Manohar/Project-COVgh-V1,Sickness Detection with 80% Val Accuracy. Trai...,https://api.github.com/repos/E-B-Manohar/Proje...,False,False,7,7,3,...,True,True,,https://github.com/E-B-Manohar/Project-COVgh-V1,Jupyter Notebook,2020-05-04T11:58:58Z,2020-09-09T09:46:17Z,2021-08-11T05:55:33Z,https://github.com/E-B-Manohar/Project-COVgh-V1,E-B-Manohar
177991,264028112,covid-19-app,leoalcantara/covid-19-app,Aplicação feita com ReactJS e PWA,https://api.github.com/repos/leoalcantara/covi...,False,False,0,0,0,...,True,True,,https://github.com/leoalcantara/covid-19-app,JavaScript,2020-05-14T21:22:38Z,2021-01-06T03:02:59Z,2020-06-19T02:15:26Z,https://github.com/leoalcantara/covid-19-app,leoalcantara


**14.714** repositorios.

### No en el nombre pero sí en la descripción:

In [20]:
df_not_in_name_but_in_description = df.full_name.str.contains(pattern_name) == False
df_not_in_name_but_in_description

0         False
1         False
2         False
3         False
4         False
          ...  
178065     True
178066    False
178067     True
178068     True
178069     True
Name: full_name, Length: 178070, dtype: bool

In [21]:
df_not_in_name_but_in_description = df[df_not_in_name_but_in_description]
df_not_in_name_but_in_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
40,237221585,gatsby-map,elinlutz/gatsby-map,"Mapping the coronavirus in react, gatsby.js an...",https://api.github.com/repos/elinlutz/gatsby-m...,False,False,31,31,12,...,True,True,https://coronakartan.se,https://github.com/elinlutz/gatsby-map,JavaScript,2020-01-30T13:43:13Z,2021-05-06T16:13:49Z,2020-07-01T14:29:51Z,https://github.com/elinlutz/gatsby-map,elinlutz
41,236218942,cido,CIDO-ontology/cido,CIDO: Coronavirus Infectious Disease Ontology,https://api.github.com/repos/CIDO-ontology/cid...,False,False,13,13,9,...,True,True,,https://github.com/CIDO-ontology/cido,,2020-01-25T19:30:01Z,2021-08-12T05:47:50Z,2021-08-12T05:47:53Z,https://github.com/CIDO-ontology/cido,CIDO-ontology
64,237385601,Getting-Things-Done-with-Pytorch,curiousily/Getting-Things-Done-with-Pytorch,Jupyter Notebook tutorials on solving real-wor...,https://api.github.com/repos/curiousily/Gettin...,False,False,957,957,300,...,True,True,https://www.curiousily.com/,https://github.com/curiousily/Getting-Things-D...,Jupyter Notebook,2020-01-31T08:02:53Z,2021-06-14T09:47:01Z,2021-08-14T11:16:33Z,https://github.com/curiousily/Getting-Things-D...,curiousily
67,236146885,pneumonia,lispczz/pneumonia,中国新型冠状病毒肺炎地级市疫情图,https://api.github.com/repos/lispczz/pneumonia...,False,False,356,356,100,...,True,True,https://lispczz.github.io/pneumonia/,https://github.com/lispczz/pneumonia,JavaScript,2020-01-25T08:56:59Z,2021-08-12T22:57:00Z,2021-07-30T00:06:31Z,https://github.com/lispczz/pneumonia,lispczz
72,229427465,Hentai-discord-bot,Eddy-Arch/Hentai-discord-bot,A NSFW hentai discord bot with extra functiona...,https://api.github.com/repos/Eddy-Arch/Hentai-...,False,False,21,21,12,...,True,True,,https://github.com/Eddy-Arch/Hentai-discord-bot,Python,2019-12-21T12:55:55Z,2021-07-18T09:53:05Z,2021-07-29T10:33:41Z,https://github.com/Eddy-Arch/Hentai-discord-bot,Eddy-Arch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178067,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,True,,https://github.com/jahnavi-prasad/face-mask-de...,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


In [22]:
df_not_in_name_but_in_description_boolean = df.description.str.contains(pattern_description)
df_not_in_name_but_in_description_boolean 

0          True
1          True
2          True
3          True
4          True
          ...  
178065    False
178066     True
178067     True
178068    False
178069    False
Name: description, Length: 178070, dtype: object

In [23]:
df_not_in_name_but_in_description_boolean = df_not_in_name_but_in_description_boolean.fillna(False)

In [24]:
df_not_in_name_but_in_description = df_not_in_name_but_in_description[df_not_in_name_but_in_description_boolean]
df_not_in_name_but_in_description

  df_not_in_name_but_in_description = df_not_in_name_but_in_description[df_not_in_name_but_in_description_boolean]


Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
40,237221585,gatsby-map,elinlutz/gatsby-map,"Mapping the coronavirus in react, gatsby.js an...",https://api.github.com/repos/elinlutz/gatsby-m...,False,False,31,31,12,...,True,True,https://coronakartan.se,https://github.com/elinlutz/gatsby-map,JavaScript,2020-01-30T13:43:13Z,2021-05-06T16:13:49Z,2020-07-01T14:29:51Z,https://github.com/elinlutz/gatsby-map,elinlutz
41,236218942,cido,CIDO-ontology/cido,CIDO: Coronavirus Infectious Disease Ontology,https://api.github.com/repos/CIDO-ontology/cid...,False,False,13,13,9,...,True,True,,https://github.com/CIDO-ontology/cido,,2020-01-25T19:30:01Z,2021-08-12T05:47:50Z,2021-08-12T05:47:53Z,https://github.com/CIDO-ontology/cido,CIDO-ontology
64,237385601,Getting-Things-Done-with-Pytorch,curiousily/Getting-Things-Done-with-Pytorch,Jupyter Notebook tutorials on solving real-wor...,https://api.github.com/repos/curiousily/Gettin...,False,False,957,957,300,...,True,True,https://www.curiousily.com/,https://github.com/curiousily/Getting-Things-D...,Jupyter Notebook,2020-01-31T08:02:53Z,2021-06-14T09:47:01Z,2021-08-14T11:16:33Z,https://github.com/curiousily/Getting-Things-D...,curiousily
67,236146885,pneumonia,lispczz/pneumonia,中国新型冠状病毒肺炎地级市疫情图,https://api.github.com/repos/lispczz/pneumonia...,False,False,356,356,100,...,True,True,https://lispczz.github.io/pneumonia/,https://github.com/lispczz/pneumonia,JavaScript,2020-01-25T08:56:59Z,2021-08-12T22:57:00Z,2021-07-30T00:06:31Z,https://github.com/lispczz/pneumonia,lispczz
72,229427465,Hentai-discord-bot,Eddy-Arch/Hentai-discord-bot,A NSFW hentai discord bot with extra functiona...,https://api.github.com/repos/Eddy-Arch/Hentai-...,False,False,21,21,12,...,True,True,,https://github.com/Eddy-Arch/Hentai-discord-bot,Python,2019-12-21T12:55:55Z,2021-07-18T09:53:05Z,2021-07-29T10:33:41Z,https://github.com/Eddy-Arch/Hentai-discord-bot,Eddy-Arch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178037,267352452,steam-lockdown,digital-wellbeing/steam-lockdown,How has gaming changed during lockdown? Analys...,https://api.github.com/repos/digital-wellbeing...,False,False,0,0,0,...,True,True,https://digital-wellbeing.github.io/steam-lock...,https://github.com/digital-wellbeing/steam-loc...,HTML,2020-05-27T15:10:55Z,2021-02-10T14:26:15Z,2021-02-16T13:48:34Z,https://github.com/digital-wellbeing/steam-loc...,digital-wellbeing
178047,267575087,Smart-Food-Storage,m-boutaleb/Smart-Food-Storage,An intelligent application (very useful during...,https://api.github.com/repos/m-boutaleb/Smart-...,False,False,0,0,0,...,True,True,,https://github.com/m-boutaleb/Smart-Food-Storage,Java,2020-05-28T11:45:23Z,2020-10-04T15:25:00Z,2021-03-23T08:43:26Z,https://github.com/m-boutaleb/Smart-Food-Storage,m-boutaleb
178050,246130172,Airplanes,cgettings/Airplanes,Visualizing flights into LGA at the start of t...,https://api.github.com/repos/cgettings/Airplan...,False,False,0,0,0,...,True,False,,https://github.com/cgettings/Airplanes,HTML,2020-03-09T20:01:16Z,2020-08-10T19:11:27Z,2020-08-10T19:11:31Z,https://github.com/cgettings/Airplanes,cgettings
178051,267858096,face_mask_detection_openvino,mmphego/face_mask_detection_openvino,Detect faces and determine whether people are ...,https://api.github.com/repos/mmphego/face_mask...,False,False,35,35,11,...,True,True,https://blog.mphomphego.co.za/blog/2020/06/02/...,https://github.com/mmphego/face_mask_detection...,Python,2020-05-29T12:56:47Z,2021-02-16T12:18:40Z,2021-05-25T21:53:17Z,https://github.com/mmphego/face_mask_detection...,mmphego


**17.566** repositorios.

### En el nombre y en la descripción:

In [25]:
df_in_name_and_description_boolean = df.full_name.str.contains(pattern_name) & df.description.str.contains(pattern_description)
df_in_name_and_description_boolean

0          True
1          True
2          True
3          True
4          True
          ...  
178065    False
178066     True
178067    False
178068    False
178069    False
Length: 178070, dtype: bool

In [26]:
df_in_name_and_description = df[df_in_name_and_description_boolean]
df_in_name_and_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178042,267669502,pydemic,uiuc-covid19-modeling/pydemic,a python driver for epidemic modeling and infe...,https://api.github.com/repos/uiuc-covid19-mode...,False,False,5,5,4,...,True,True,,https://github.com/uiuc-covid19-modeling/pydemic,Python,2020-05-28T18:45:50Z,2021-01-19T16:34:17Z,2021-07-22T14:31:13Z,https://github.com/uiuc-covid19-modeling/pydemic,uiuc-covid19-modeling
178046,267472666,simulation-model-of-interventions-to-interrupt...,nicolaspenagos/simulation-model-of-interventio...,I present software to simulate how biosecurity...,https://api.github.com/repos/nicolaspenagos/si...,False,False,0,0,0,...,True,True,,https://github.com/nicolaspenagos/simulation-m...,Java,2020-05-28T02:18:43Z,2020-07-13T04:28:52Z,2020-08-15T08:18:43Z,https://github.com/nicolaspenagos/simulation-m...,nicolaspenagos
178054,268148998,Epidemics-Modelling,pierpaolo28/Epidemics-Modelling,Modelling and tracking of epidemics data analy...,https://api.github.com/repos/pierpaolo28/Epide...,False,False,1,1,0,...,True,True,http://3.22.240.181:8501,https://github.com/pierpaolo28/Epidemics-Model...,Jupyter Notebook,2020-05-30T19:48:56Z,2020-11-17T19:44:00Z,2021-01-30T21:40:09Z,https://github.com/pierpaolo28/Epidemics-Model...,pierpaolo28
178064,269601203,CoronaDashboard,StefH/CoronaDashboard,Een Corona Dashboard gebaseerd op cijfers van ...,https://api.github.com/repos/StefH/CoronaDashb...,False,False,1,1,0,...,True,True,,https://github.com/StefH/CoronaDashboard,C#,2020-06-05T10:29:30Z,2021-07-19T15:54:46Z,2021-07-19T15:52:49Z,https://github.com/StefH/CoronaDashboard,StefH


In [27]:
df_in_name_and_description.shape[0]

66534

**66.534** repositorios.

------

### Comprobación del 62,7% de repositorios descartados si nos quedásemos con la última opción

In [28]:
df = pd.read_csv('df_repositorios.csv')
df_sin = df[~df_in_name_and_description_boolean]
df_sin

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
20,241636482,coronavirus,YaseenAbdullah/coronavirus,,https://api.github.com/repos/YaseenAbdullah/co...,False,False,34,34,13,...,True,True,,https://github.com/YaseenAbdullah/coronavirus,Dart,2020-02-19T14:03:45Z,2020-05-10T14:58:36Z,2021-03-27T16:11:56Z,https://github.com/YaseenAbdullah/coronavirus,YaseenAbdullah
40,237221585,gatsby-map,elinlutz/gatsby-map,"Mapping the coronavirus in react, gatsby.js an...",https://api.github.com/repos/elinlutz/gatsby-m...,False,False,31,31,12,...,True,True,https://coronakartan.se,https://github.com/elinlutz/gatsby-map,JavaScript,2020-01-30T13:43:13Z,2021-05-06T16:13:49Z,2020-07-01T14:29:51Z,https://github.com/elinlutz/gatsby-map,elinlutz
41,236218942,cido,CIDO-ontology/cido,CIDO: Coronavirus Infectious Disease Ontology,https://api.github.com/repos/CIDO-ontology/cid...,False,False,13,13,9,...,True,True,,https://github.com/CIDO-ontology/cido,,2020-01-25T19:30:01Z,2021-08-12T05:47:50Z,2021-08-12T05:47:53Z,https://github.com/CIDO-ontology/cido,CIDO-ontology
48,237517471,Coronavirus-Chrome-Extension,WebDevSimplified/Coronavirus-Chrome-Extension,,https://api.github.com/repos/WebDevSimplified/...,False,False,24,24,10,...,True,True,,https://github.com/WebDevSimplified/Coronaviru...,JavaScript,2020-01-31T21:15:58Z,2020-01-31T21:18:07Z,2021-07-25T14:47:10Z,https://github.com/WebDevSimplified/Coronaviru...,WebDevSimplified
50,236273771,corona-frontend,theleadio/corona-frontend,,https://api.github.com/repos/theleadio/corona-...,False,False,45,45,33,...,True,True,https://www.coronatracker.com/,https://github.com/theleadio/corona-frontend,JavaScript,2020-01-26T06:01:59Z,2021-08-11T22:39:03Z,2021-05-04T15:05:19Z,https://github.com/theleadio/corona-frontend,theleadio
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178067,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,True,,https://github.com/jahnavi-prasad/face-mask-de...,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


In [29]:
df_sin.sort_values(by=['stargazers_count'])

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
93690,253104417,covid19-chatbot,dbacall/covid19-chatbot,,https://api.github.com/repos/dbacall/covid19-c...,False,False,0,0,0,...,True,True,,https://github.com/dbacall/covid19-chatbot,JavaScript,2020-04-04T21:44:11Z,2021-08-12T06:02:10Z,2020-04-09T10:11:47Z,https://github.com/dbacall/covid19-chatbot,dbacall
117101,361328496,Taller-Covid-Colombia,levf18/Taller-Covid-Colombia,,https://api.github.com/repos/levf18/Taller-Cov...,False,False,0,0,0,...,True,True,,https://github.com/levf18/Taller-Covid-Colombia,Python,2021-04-25T04:12:53Z,2021-04-25T04:43:04Z,2021-04-25T04:43:06Z,https://github.com/levf18/Taller-Covid-Colombia,levf18
117100,361553148,vue-tailwind-covid-tracker,Brian-Santeliz/vue-tailwind-covid-tracker,,https://api.github.com/repos/Brian-Santeliz/vu...,False,False,0,0,0,...,True,True,,https://github.com/Brian-Santeliz/vue-tailwind...,JavaScript,2021-04-25T22:31:27Z,2021-04-26T02:29:38Z,2021-04-26T02:29:40Z,https://github.com/Brian-Santeliz/vue-tailwind...,Brian-Santeliz
117099,361405214,flutter_covid_today,sing3demons/flutter_covid_today,,https://api.github.com/repos/sing3demons/flutt...,False,False,0,0,0,...,True,True,,https://github.com/sing3demons/flutter_covid_t...,Dart,2021-04-25T11:06:34Z,2021-04-25T11:08:39Z,2021-04-25T11:08:41Z,https://github.com/sing3demons/flutter_covid_t...,sing3demons
117098,361355998,COVID-19-Cough-Detection,HrithikNambiar/COVID-19-Cough-Detection,An attempt at INTERSPEECH 2021 Computational P...,https://api.github.com/repos/HrithikNambiar/CO...,False,False,0,0,0,...,True,True,,https://github.com/HrithikNambiar/COVID-19-Cou...,Jupyter Notebook,2021-04-25T07:05:25Z,2021-06-04T08:18:32Z,2021-07-07T13:39:53Z,https://github.com/HrithikNambiar/COVID-19-Cou...,HrithikNambiar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96169,253216457,springer_free_books,alexgand/springer_free_books,Python script to download all Springer books r...,https://api.github.com/repos/alexgand/springer...,False,False,1633,1633,370,...,True,True,,https://github.com/alexgand/springer_free_books,Python,2020-04-05T11:14:34Z,2021-06-02T01:22:57Z,2021-08-09T05:40:25Z,https://github.com/alexgand/springer_free_books,alexgand
13534,260712390,cwa-server,corona-warn-app/cwa-server,Backend implementation for the Apple/Google ex...,https://api.github.com/repos/corona-warn-app/c...,False,False,1752,1752,336,...,False,False,https://www.coronawarn.app/,https://github.com/corona-warn-app/cwa-server,Java,2020-05-02T15:07:31Z,2021-08-13T11:24:57Z,2021-08-12T15:49:00Z,https://github.com/corona-warn-app/cwa-server,corona-warn-app
2993,247821387,app,WorldHealthOrganization/app,COVID-19 App,https://api.github.com/repos/WorldHealthOrgani...,False,False,2060,2060,528,...,True,True,,https://github.com/WorldHealthOrganization/app,Dart,2020-03-16T21:38:17Z,2021-08-13T06:01:40Z,2021-08-11T02:43:08Z,https://github.com/WorldHealthOrganization/app,WorldHealthOrganization
154209,260064828,exposure-notifications-server,google/exposure-notifications-server,Exposure Notification Reference Server | Covid...,https://api.github.com/repos/google/exposure-n...,False,False,2076,2076,289,...,False,False,https://www.google.com/covid19/exposurenotific...,https://github.com/google/exposure-notificatio...,Go,2020-04-29T23:03:36Z,2021-08-12T17:01:13Z,2021-08-12T17:01:16Z,https://github.com/google/exposure-notificatio...,google


In [30]:
df_sin_sorted = df_sin.sort_values(by=['stargazers_count'], ascending=False).head(20)
df_sin_sorted[['id', 'name', 'full_name', 'description', 'stargazers_count', 'html_url']]

Unnamed: 0,id,name,full_name,description,stargazers_count,html_url
50852,245077032,API,disease-sh/API,API for Current cases and more stuff about COV...,2289,https://github.com/disease-sh/API
154209,260064828,exposure-notifications-server,google/exposure-notifications-server,Exposure Notification Reference Server | Covid...,2076,https://github.com/google/exposure-notificatio...
2993,247821387,app,WorldHealthOrganization/app,COVID-19 App,2060,https://github.com/WorldHealthOrganization/app
13534,260712390,cwa-server,corona-warn-app/cwa-server,Backend implementation for the Apple/Google ex...,1752,https://github.com/corona-warn-app/cwa-server
96169,253216457,springer_free_books,alexgand/springer_free_books,Python script to download all Springer books r...,1633,https://github.com/alexgand/springer_free_books
105038,254129456,Free_CyberSecurity_Professional_Development_Re...,gerryguy311/Free_CyberSecurity_Professional_De...,An awesome list of FREE resources for training...,1075,https://github.com/gerryguy311/Free_CyberSecur...
176821,21244714,tika-python,chrismattmann/tika-python,Tika-Python is a Python binding to the Apache ...,1062,https://github.com/chrismattmann/tika-python
7113,250585282,Amazon-Fresh-Whole-Foods-delivery-slot-finder,ahertel/Amazon-Fresh-Whole-Foods-delivery-slot...,A Mac tool that finds available delivery slots...,1049,https://github.com/ahertel/Amazon-Fresh-Whole-...
64,237385601,Getting-Things-Done-with-Pytorch,curiousily/Getting-Things-Done-with-Pytorch,Jupyter Notebook tutorials on solving real-wor...,957,https://github.com/curiousily/Getting-Things-D...
177900,257567981,Face-Mask-Detection,chandrikadeb7/Face-Mask-Detection,Face Mask Detection system based on computer v...,952,https://github.com/chandrikadeb7/Face-Mask-Det...


In [212]:
# show(df_sin_sorted[['id', 'name', 'full_name', 'description', 'stargazers_count', 'html_url']])

----

### 4.2.8.3 Estrategia de filtrado

Primero comprobamos las palabras claves en el nombre:

In [31]:
df_in_name

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178054,268148998,Epidemics-Modelling,pierpaolo28/Epidemics-Modelling,Modelling and tracking of epidemics data analy...,https://api.github.com/repos/pierpaolo28/Epide...,False,False,1,1,0,...,True,True,http://3.22.240.181:8501,https://github.com/pierpaolo28/Epidemics-Model...,Jupyter Notebook,2020-05-30T19:48:56Z,2020-11-17T19:44:00Z,2021-01-30T21:40:09Z,https://github.com/pierpaolo28/Epidemics-Model...,pierpaolo28
178056,268314248,UI,CovidToday/UI,,https://api.github.com/repos/CovidToday/UI/con...,False,False,0,0,0,...,True,False,,https://github.com/CovidToday/UI,JavaScript,2020-05-31T16:02:18Z,2021-08-12T15:32:29Z,2020-09-03T02:43:14Z,https://github.com/CovidToday/UI,CovidToday
178061,268770101,coroname,mariabnd/coroname,,https://api.github.com/repos/mariabnd/coroname...,False,False,0,0,0,...,True,True,,https://github.com/mariabnd/coroname,,2020-06-02T10:25:35Z,2021-01-06T12:52:34Z,2021-01-06T12:52:37Z,https://github.com/mariabnd/coroname,mariabnd
178064,269601203,CoronaDashboard,StefH/CoronaDashboard,Een Corona Dashboard gebaseerd op cijfers van ...,https://api.github.com/repos/StefH/CoronaDashb...,False,False,1,1,0,...,True,True,,https://github.com/StefH/CoronaDashboard,C#,2020-06-05T10:29:30Z,2021-07-19T15:54:46Z,2021-07-19T15:52:49Z,https://github.com/StefH/CoronaDashboard,StefH


Filtramos los que tienen palabras claves en el nombre y nos quedamos con el resto para seguir analizándolo:

In [32]:
df_res_final = df_in_name #Aquí se irán acumulando los repositorios válidos

In [33]:
df_not_in_name = df[~df_in_name_boolean]

In [34]:
df_not_in_name

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
40,237221585,gatsby-map,elinlutz/gatsby-map,"Mapping the coronavirus in react, gatsby.js an...",https://api.github.com/repos/elinlutz/gatsby-m...,False,False,31,31,12,...,True,True,https://coronakartan.se,https://github.com/elinlutz/gatsby-map,JavaScript,2020-01-30T13:43:13Z,2021-05-06T16:13:49Z,2020-07-01T14:29:51Z,https://github.com/elinlutz/gatsby-map,elinlutz
41,236218942,cido,CIDO-ontology/cido,CIDO: Coronavirus Infectious Disease Ontology,https://api.github.com/repos/CIDO-ontology/cid...,False,False,13,13,9,...,True,True,,https://github.com/CIDO-ontology/cido,,2020-01-25T19:30:01Z,2021-08-12T05:47:50Z,2021-08-12T05:47:53Z,https://github.com/CIDO-ontology/cido,CIDO-ontology
64,237385601,Getting-Things-Done-with-Pytorch,curiousily/Getting-Things-Done-with-Pytorch,Jupyter Notebook tutorials on solving real-wor...,https://api.github.com/repos/curiousily/Gettin...,False,False,957,957,300,...,True,True,https://www.curiousily.com/,https://github.com/curiousily/Getting-Things-D...,Jupyter Notebook,2020-01-31T08:02:53Z,2021-06-14T09:47:01Z,2021-08-14T11:16:33Z,https://github.com/curiousily/Getting-Things-D...,curiousily
67,236146885,pneumonia,lispczz/pneumonia,中国新型冠状病毒肺炎地级市疫情图,https://api.github.com/repos/lispczz/pneumonia...,False,False,356,356,100,...,True,True,https://lispczz.github.io/pneumonia/,https://github.com/lispczz/pneumonia,JavaScript,2020-01-25T08:56:59Z,2021-08-12T22:57:00Z,2021-07-30T00:06:31Z,https://github.com/lispczz/pneumonia,lispczz
72,229427465,Hentai-discord-bot,Eddy-Arch/Hentai-discord-bot,A NSFW hentai discord bot with extra functiona...,https://api.github.com/repos/Eddy-Arch/Hentai-...,False,False,21,21,12,...,True,True,,https://github.com/Eddy-Arch/Hentai-discord-bot,Python,2019-12-21T12:55:55Z,2021-07-18T09:53:05Z,2021-07-29T10:33:41Z,https://github.com/Eddy-Arch/Hentai-discord-bot,Eddy-Arch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178067,269906994,face-mask-detection,jahnavi-prasad/face-mask-detection,Simple project to detect if a person is wearin...,https://api.github.com/repos/jahnavi-prasad/fa...,False,False,6,6,7,...,True,True,,https://github.com/jahnavi-prasad/face-mask-de...,Python,2020-06-06T07:39:31Z,2020-06-06T07:46:00Z,2021-05-03T13:00:00Z,https://github.com/jahnavi-prasad/face-mask-de...,jahnavi-prasad
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


Comprobamos los repositorios que tienen palabras relacionadas en la descripción:

In [35]:
df_in_description_boolean = df_not_in_name.description.str.contains(pattern_description)
df_in_description_boolean

40         True
41         True
64         True
67         True
72         True
          ...  
178063    False
178065    False
178067     True
178068    False
178069    False
Name: description, Length: 18913, dtype: object

Número de repositorios que tienen la descripción a nula:

In [36]:
df_in_description_boolean.isnull().sum()

140

In [37]:
#Rellenamos a False los valores nulos de la descripción
df_in_description_boolean = df_in_description_boolean.fillna(False)

df_in_description = df_not_in_name[df_in_description_boolean]
df_in_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
40,237221585,gatsby-map,elinlutz/gatsby-map,"Mapping the coronavirus in react, gatsby.js an...",https://api.github.com/repos/elinlutz/gatsby-m...,False,False,31,31,12,...,True,True,https://coronakartan.se,https://github.com/elinlutz/gatsby-map,JavaScript,2020-01-30T13:43:13Z,2021-05-06T16:13:49Z,2020-07-01T14:29:51Z,https://github.com/elinlutz/gatsby-map,elinlutz
41,236218942,cido,CIDO-ontology/cido,CIDO: Coronavirus Infectious Disease Ontology,https://api.github.com/repos/CIDO-ontology/cid...,False,False,13,13,9,...,True,True,,https://github.com/CIDO-ontology/cido,,2020-01-25T19:30:01Z,2021-08-12T05:47:50Z,2021-08-12T05:47:53Z,https://github.com/CIDO-ontology/cido,CIDO-ontology
64,237385601,Getting-Things-Done-with-Pytorch,curiousily/Getting-Things-Done-with-Pytorch,Jupyter Notebook tutorials on solving real-wor...,https://api.github.com/repos/curiousily/Gettin...,False,False,957,957,300,...,True,True,https://www.curiousily.com/,https://github.com/curiousily/Getting-Things-D...,Jupyter Notebook,2020-01-31T08:02:53Z,2021-06-14T09:47:01Z,2021-08-14T11:16:33Z,https://github.com/curiousily/Getting-Things-D...,curiousily
67,236146885,pneumonia,lispczz/pneumonia,中国新型冠状病毒肺炎地级市疫情图,https://api.github.com/repos/lispczz/pneumonia...,False,False,356,356,100,...,True,True,https://lispczz.github.io/pneumonia/,https://github.com/lispczz/pneumonia,JavaScript,2020-01-25T08:56:59Z,2021-08-12T22:57:00Z,2021-07-30T00:06:31Z,https://github.com/lispczz/pneumonia,lispczz
72,229427465,Hentai-discord-bot,Eddy-Arch/Hentai-discord-bot,A NSFW hentai discord bot with extra functiona...,https://api.github.com/repos/Eddy-Arch/Hentai-...,False,False,21,21,12,...,True,True,,https://github.com/Eddy-Arch/Hentai-discord-bot,Python,2019-12-21T12:55:55Z,2021-07-18T09:53:05Z,2021-07-29T10:33:41Z,https://github.com/Eddy-Arch/Hentai-discord-bot,Eddy-Arch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178037,267352452,steam-lockdown,digital-wellbeing/steam-lockdown,How has gaming changed during lockdown? Analys...,https://api.github.com/repos/digital-wellbeing...,False,False,0,0,0,...,True,True,https://digital-wellbeing.github.io/steam-lock...,https://github.com/digital-wellbeing/steam-loc...,HTML,2020-05-27T15:10:55Z,2021-02-10T14:26:15Z,2021-02-16T13:48:34Z,https://github.com/digital-wellbeing/steam-loc...,digital-wellbeing
178047,267575087,Smart-Food-Storage,m-boutaleb/Smart-Food-Storage,An intelligent application (very useful during...,https://api.github.com/repos/m-boutaleb/Smart-...,False,False,0,0,0,...,True,True,,https://github.com/m-boutaleb/Smart-Food-Storage,Java,2020-05-28T11:45:23Z,2020-10-04T15:25:00Z,2021-03-23T08:43:26Z,https://github.com/m-boutaleb/Smart-Food-Storage,m-boutaleb
178050,246130172,Airplanes,cgettings/Airplanes,Visualizing flights into LGA at the start of t...,https://api.github.com/repos/cgettings/Airplan...,False,False,0,0,0,...,True,False,,https://github.com/cgettings/Airplanes,HTML,2020-03-09T20:01:16Z,2020-08-10T19:11:27Z,2020-08-10T19:11:31Z,https://github.com/cgettings/Airplanes,cgettings
178051,267858096,face_mask_detection_openvino,mmphego/face_mask_detection_openvino,Detect faces and determine whether people are ...,https://api.github.com/repos/mmphego/face_mask...,False,False,35,35,11,...,True,True,https://blog.mphomphego.co.za/blog/2020/06/02/...,https://github.com/mmphego/face_mask_detection...,Python,2020-05-29T12:56:47Z,2021-02-16T12:18:40Z,2021-05-25T21:53:17Z,https://github.com/mmphego/face_mask_detection...,mmphego


Actualizamos en *df_res_final* con los repositorios que tienen palabras relacionadas en la descripción:

In [47]:
df_res_final = df_res_final.append(df_in_description)
df_res_final

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178037,267352452,steam-lockdown,digital-wellbeing/steam-lockdown,How has gaming changed during lockdown? Analys...,https://api.github.com/repos/digital-wellbeing...,False,False,0,0,0,...,True,True,https://digital-wellbeing.github.io/steam-lock...,https://github.com/digital-wellbeing/steam-loc...,HTML,2020-05-27T15:10:55Z,2021-02-10T14:26:15Z,2021-02-16T13:48:34Z,https://github.com/digital-wellbeing/steam-loc...,digital-wellbeing
178047,267575087,Smart-Food-Storage,m-boutaleb/Smart-Food-Storage,An intelligent application (very useful during...,https://api.github.com/repos/m-boutaleb/Smart-...,False,False,0,0,0,...,True,True,,https://github.com/m-boutaleb/Smart-Food-Storage,Java,2020-05-28T11:45:23Z,2020-10-04T15:25:00Z,2021-03-23T08:43:26Z,https://github.com/m-boutaleb/Smart-Food-Storage,m-boutaleb
178050,246130172,Airplanes,cgettings/Airplanes,Visualizing flights into LGA at the start of t...,https://api.github.com/repos/cgettings/Airplan...,False,False,0,0,0,...,True,False,,https://github.com/cgettings/Airplanes,HTML,2020-03-09T20:01:16Z,2020-08-10T19:11:27Z,2020-08-10T19:11:31Z,https://github.com/cgettings/Airplanes,cgettings
178051,267858096,face_mask_detection_openvino,mmphego/face_mask_detection_openvino,Detect faces and determine whether people are ...,https://api.github.com/repos/mmphego/face_mask...,False,False,35,35,11,...,True,True,https://blog.mphomphego.co.za/blog/2020/06/02/...,https://github.com/mmphego/face_mask_detection...,Python,2020-05-29T12:56:47Z,2021-02-16T12:18:40Z,2021-05-25T21:53:17Z,https://github.com/mmphego/face_mask_detection...,mmphego


A continuación filtramos, quedándonos con los que NO tienen palabras relacionadas en la descripción:

In [45]:
df_not_in_description = df_not_in_name[~df_in_description_boolean]
df_not_in_description

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
79,229502804,data-visualisation-scripts,Perishleaf/data-visualisation-scripts,Collection of scripts for data visualisation a...,https://api.github.com/repos/Perishleaf/data-v...,False,False,80,80,90,...,True,True,,https://github.com/Perishleaf/data-visualisati...,Jupyter Notebook,2019-12-22T01:12:45Z,2020-08-06T12:06:03Z,2021-06-10T12:17:53Z,https://github.com/Perishleaf/data-visualisati...,Perishleaf
140,141635301,Water-Monitoring-System,vinitshahdeo/Water-Monitoring-System,Water Monitoring System is an IOT based Liquid...,https://api.github.com/repos/vinitshahdeo/Wate...,False,False,128,128,245,...,True,True,https://vinitshahdeo.github.io/Water-Monitorin...,https://github.com/vinitshahdeo/Water-Monitori...,HTML,2018-07-19T22:06:05Z,2021-06-09T18:42:30Z,2021-08-06T15:01:36Z,https://github.com/vinitshahdeo/Water-Monitori...,vinitshahdeo
157,237388368,red-cross,WeileiZeng/red-cross,官方公示数据概览,https://api.github.com/repos/WeileiZeng/red-cr...,False,False,19,19,4,...,True,True,https://weileizeng.github.io/red-cross/,https://github.com/WeileiZeng/red-cross,Python,2020-01-31T08:22:45Z,2021-07-13T07:01:42Z,2020-05-27T05:59:00Z,https://github.com/WeileiZeng/red-cross,WeileiZeng
458,170732312,limnoria-plugins,oddluck/limnoria-plugins,Limnoria plugins I wrote or forked.,https://api.github.com/repos/oddluck/limnoria-...,False,False,33,33,8,...,True,True,,https://github.com/oddluck/limnoria-plugins,Python,2019-02-14T17:40:29Z,2021-06-23T19:44:25Z,2021-08-14T00:12:47Z,https://github.com/oddluck/limnoria-plugins,oddluck
842,155705971,alm-api-backend,Beor18/alm-api-backend,Comenzo como desafio almundo.com y ahora estoy...,https://api.github.com/repos/Beor18/alm-api-ba...,False,False,11,11,2,...,True,True,https://almundo-examen.herokuapp.com/api/v1/ho...,https://github.com/Beor18/alm-api-backend,JavaScript,2018-11-01T11:37:25Z,2020-07-13T22:22:44Z,2020-09-20T03:56:50Z,https://github.com/Beor18/alm-api-backend,Beor18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178062,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


In [46]:
df_not_in_description.shape[0]

1347

Nos quedan 1347 repositorios que habrá que analizar individualmente para saber si están relacionados con la COVID-19 o no. Para ello consultaremos los *topics* que los componen, a fin de encontrar tópicos relacionados con la pandemia.

In [221]:
# show(df_not_in_description)

### Comprobación de *topics* en repositorios

In [48]:
token = "" # Rellenar con token generado en GitHub
header = {'Authorization': 'token %s' % token, 'Accept':'application/vnd.github.mercy-preview+json'}

In [49]:
df_topics = pd.DataFrame(columns=['id', 'topics'])

for ind in df_not_in_description.index:
    print("\n\n -- OBTENIENDO TOPICS DEL REPOSITORIO {} --".format(df_not_in_description['name'][ind]))
    
    query = "?q=per_page=100"

    response = requests.get('https://api.github.com/repos/' + df_not_in_description['full_name'][ind] + "/topics" + query, 
                            headers=header)
    response_json = response.json()
    
    if "names" in response_json:
        print(response_json["names"])

        df_topics.loc[ind] = [df_not_in_description['id'][ind], response_json["names"]]
    else:
        df_topics.loc[ind] = [df_not_in_description['id'][ind], None]
        
    print("Esperando 0,5 segundos...")
    time.sleep(0.5)



 -- OBTENIENDO TOPICS DEL REPOSITORIO data-visualisation-scripts --
['datavisualization', 'python3', 'covid-19', 'dashborad', 'matplotlib', 'plotly-dash', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Water-Monitoring-System --
['iot', 'water-monitoring', 'jquery', 'javascript', 'arduino', 'ultrasonic-sensors', 'hardware', 'nodejs', 'django', 'api', 'rgsoc', 'gssoc20', 'gssoc', 'covid-19', 'coronavirus-tracking', 'coronavirus', 'coronavirus-real-time', 'hacktoberfest', 'hacktoberfest2020']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO red-cross --
['red-cross', 'github-pages', 'coronavirus', 'covid-19', 'open-source-wuhan', 'ncov']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO limnoria-plugins --
['limnoria', 'irc', 'ascii-art', 'jeopardy', 'uno', 'interactive-fiction', 'lyrics', 'irc-art', 'ansi-art', 'cards-against-humanity', 'zork', 'limnoria-plugins', 'trivia', 'supybot', 'supybot-plugin', 'dice-roller', '



 -- OBTENIENDO TOPICS DEL REPOSITORIO Many-Api --
['api', 'garfield', 'covid19', 'coronavirus', 'c19', 'pokemon', 'pokemondata', 'pokemonapi', 'garfieldapi', 'atla', 'coronavirus-data', 'mcu', 'marvel', 'marvelquotes', 'twitter', 'tweets', 'marvel-api', 'covidapi', 'jokes', 'jokes-api']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO status --
['vkontakte', 'vk-api', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO awesome-confinement --
['awesome-list', 'awesome-lists', 'coronavirus', 'confinement']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO OpenVentilator --
['ventilator', 'arduino', 'covid-19', 'covid', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO KoronaKartta --
['coronavirus', 'map', 'finland', 'history']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO attestation-deplacement-derogatoire --
['france', 'confinement', 'covid-19', 'coronavirus']
Esperando 0



 -- OBTENIENDO TOPICS DEL REPOSITORIO super-simple-ventilator --
['ventilator', 'ventilator-firmware', 'platformio', 'coronavirus', 'covid-19', 'arduino']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO orange-cxr-classifier --
['covid-19', 'coronavirus', 'xray', 'chest', 'machine-learning', 'orange']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO RIST --
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO 7dayserlangen --
['hacktoberfest', 'coronavirus', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO pyross --
['covid-19', 'infectious-disease-models', 'epidemiological-predictions', 'epidemic-simulations', 'coronavirus', 'sars-cov-2', 'compartment-models', 'epidemiological-models', 'pyross']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO camara-descontaminacao-ar --
['healtcare', 'coronavirus', 'covid-19', 'open-source']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORI

['delivery', 'bem', 'covid-19', 'coronavirus', 'small', 'producers', 'brazilian']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Crawling --
['crawling', 'crawling-python', 'selenium', 'beautifulsoup', 'lda', 'lda-model', 'algorithms', 'wordcloud', 'coronavirus', 'covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Prioritize-Vaccine-Delivery --
['python', 'ai-ml', 'covid-19', 'coronavirus', 'vaccine', 'ai-model', 'ml-model', 'covidindia', 'vaccine-distribution', 'ey-hackathon']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO CO2_traffic_light --
['co2', 'sensor', 'arduino', 'python', 'stl', '7segment', 'rgb-led', 'arduinonano', 'coronavirus', 'air-quality']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO datamuseum.github.io --
['coronavirus', 'dashboard', 'covid-19', 'coronavirus-real-time']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO kryzysowa-kawa --
['crisis-related', 'coronavirus'



 -- OBTENIENDO TOPICS DEL REPOSITORIO Hackathon2021 --
['hackathon', 'iot', 'octopus', 'esp8266', 'co2', 'coronavirus', 'regression', 'physcomp']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO ExpCowin --
['nodejs', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO swaas-bot --
['nodejs', 'expressjs', 'javascript', 'mongodb', 'coronavirus', 'vaccination', 'india', 'whastappbot']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO bored-af --
['quarantine', 'quarantine-project', 'covid-19', 'python', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Amazon-Whole-Foods-Delivery-Slots-Finder --
['whole-foods', 'coronavirus', 'delivery-slots', 'covid-19', 'stay-home', 'amazon', 'chrome-extension', 'grocery-delivery', 'food-delivery', 'mac', 'windows', 'google-chrome', 'delivery-slot-alert', 'quarantine', 'alert']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO cowin-tracker --



 -- OBTENIENDO TOPICS DEL REPOSITORIO 6-ft-apart --
['greenfoot', 'java', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO M5Stack_Healthy_code_tracer --
['m5stack', 'micropython', 'arduino', 'onenet', 'uiflow', 'maixpy', 'coronavirus']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO OKorona --
['coronavirus', 'javascript', 'nodejs', 'mapbox-gl', 'api', 'covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO ProximityApp --
['coronavirus', 'mobile']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO not-busy-backend --
['covid', 'covid-19', 'coronavirus', 'express', 'node', 'typescript']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO not-busy-frontend --
['covid-19', 'covid19', 'coronavirus', 'businesses', 'popularity', 'wait-times']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO software-donation --
['covid-19', 'coronavirus', 'remote', 'remote-work', 'donating

['covid-19', 'covid19', 'covid', 'covid19-data', 'covid-data']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO immuni-tek-sender --
['covid', 'telegram', 'immuni', 'tek']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO TwiCo --
['python3', 'flask', 'microservices', 'soa', 'pm2', 'vue', 'vuetify', 'amcharts', 'sentiment-analysis', 'twitter', 'covid', 'mapbox', 'locationiq', 'typescript', 'nodejs', 'google-oauth2', 'artificial-intelligence', 'github-oauth-authentation']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Projeto-Banco-de-Dados --
['projeto', 'mysql', 'covid']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO r-package-assessment-msobanq --
['covid', 'shiny', 'shiny-apps', 'packages']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO helpthemidlands --
['hacktoberfest', 'covid', 'hackthemidlands']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO TeamReispar-C3DataHackathonSo

['education', 'edtech', 'schools', 'covid']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO vaccipy --
['corona', 'impfterminservice', 'vaccination', 'covid', 'covid-19', 'bot', 'python', 'selenium', 'impftermin', 'vaccine']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO miq-nz-booking-tools --
['miq', 'new-zealand', 'covid', 'nz-managed-isolation', 'quarantine', 'booking']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO check-the-fridge --
['covid', 'vaccines', 'ontario', 'api']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO essentialsapp --
['covid', 'essentials', 'flutter']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO impf-bot --
['bot', 'covid', 'vaccination', 'covid19', 'impftermin', 'kotlin', 'selenium', 'java']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO GetUsPPE --
['ppe', 'getusppe', 'covid-19', 'covid19', 'covid']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPI



 -- OBTENIENDO TOPICS DEL REPOSITORIO BERTweet --
['python3', 'bert', 'roberta', 'transformers', 'fairseq', 'language-model', 'english', 'part-of-speech-tagging', 'ner', 'named-entity-recognition', 'text-classification', 'sentiment-analysis', 'irony-detection', 'english-tweets', 'covid-19', 'covid19', 'covid', 'bertweet', 'bertweet-covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO where-can-we-meet --
['covid', 'covid-19', 'covid19', 'map', 'maps']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO uniforme-bonja --
['covid-19', 'covid', 'react', 'reactjs', 'uniforme', 'bonja', 'uniformebonja', 'terceirao', 'escola']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO BUZZ --
['covid', 'covid-19', 'voucher', 'vouchers', 'react', 'store', 'market', 'opensource']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO vueground --
['xstate', 'kanban', 'covid', 'vue']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL R

['covid19', 'placetracing']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO CareQuest --
['covid19', 'hospital-management-system', 'plasma-bank']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO folding-at-home --
['foldingathome', 'fah', 'docker', 'docker-compose', 'cancer', 'covid-19', 'covid-data', 'covid19', 'distributed-computing', 'distributed-systems', 'howtocodewell']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO BravoOscarTango --
['discord-py', 'discord-bot', 'flight-simulation', 'utility', 'covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO people-registry --
['covid-19', 'covid19', 'visitor-tracking', 'visitor-management', 'contactless-devices', 'contactless', 'vue']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Machine-Learning-Exercises --
['machine-learning-algorithms', 'machine-learning', 'jupyter-notebook', 'labs', 'ipynb-jupyter-notebook', 'ml', 'scikitlearn-machine-lea

['ohif', 'medical-imaging', 'machine-learning', 'covid19', 'covid-19', 'chest-xray']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO ninjam --
['ninjam', 'ninjamserver', 'covid19', 'jam', 'music']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO project-maskwearing --
['covid19', 'r', 'rmarkdown']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO gagopa --
['react', 'amplify', 'graphql', 'appsync', 'aws', 'travel', 'covid19', 'japan']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO project-catdensity --
['covid19', 'r', 'rmarkdown']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO project-excessmortality --
['covid19', 'r', 'rmarkdown']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO project-nonpharmainterventions --
['covid19', 'r', 'rmarkdown']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO project-youthunemployment --
['covid19', 'r', 'rmarkdown']
Esperando 0,5 



 -- OBTENIENDO TOPICS DEL REPOSITORIO KeggDrugVirtualScreening --
['covid19', 'virtual-screening', 'candidates', 'descriptors', 'molecular-modeling', 'electrostatics', 'kegg', 'atc', 'drug-discovery', 'cheminformatics']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO delivery-slot-notify --
['delivery-window', 'amazon', 'amazon-fresh', 'wholefoods', 'covid-19', 'covid19', 'instacart', 'primenow', 'grocery-delivery', 'groceries', 'groceries-online', 'asda', 'usa', 'uk', 'france', 'india', 'canada']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO india-vaccine-frontend --
['covid19', 'vaccine-availability']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO cowin18plus --
['india', 'covid19', 'vaccination', 'vaccine-availability']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO api-service --
['covid-19', 'covid19', 'pandemic', 'crowdsourcing', 'covid-19-india', 'kotlin', 'ktor', 'postgresql', 'sql', 'sqldelight', 'gr



 -- OBTENIENDO TOPICS DEL REPOSITORIO ventilator --
['ventilator', 'ventilator-designs', 'emergency-ventilator', 'covid-19', 'covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Bioinformatics --
['bioinformatics', 'python', 'covid19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Data_analysis_with_Python --
['data-analysis', 'python', 'python3', 'covid19', 'covid19-data']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO KeepWorkingFromHome --
['workfromhome', 'wfh', 'powershell', 'basic', 'chrome', 'covid-19', 'covid19', 'dashboard']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO flask_chartjs_drilldown_example_project --
['flask', 'chartjs', 'drilldown', 'covid19', 'csv', 'data-visualization', 'chart', 'time-series', 'line-chart', 'public-dataset']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO hub-for-good-list --
['digitalocean', 'hub-for-good', 'covid19', 'covid-19', 'digitalocean

['emergency-response', 'covid-19', 'healthcare', 'baltimore-city', 'code-for-baltimore', 'code-for-america', 'civic-tech', 'vuejs', 'rapid-response']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO catch --
['science', 'genomics', 'genome', 'viral', 'sequencing', 'ngs', 'dna', 'bioinformatics', 'metagenomics', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Bmore-Responsive --
['emergency-response', 'covid-19', 'api', 'contact-management', 'civic-tech', 'code-for-america', 'code-for-baltimore', 'nodejs', 'rapid-response']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO frontend --
['vue', 'vuejs', 'wuhan-support', 'science', 'openness', 'support', 'covid-19', 'covid-19-data']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO monoloco --
['3d-object-detection', '3d-detection', '3d-deep-learning', 'pytorch', 'computer-vision', 'deep-learning', 'machine-learning', 'pose-estimation', 'human-pose-estimation', 

['covid-19', 'computer-vision', 'artificial-intelligence', 'dataset', 'machine-learning', 'deep-learning', 'blockchain']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO BiLevel-Graph-Neural-Network --
['graph-convolutional-networks', 'graph-attention-networks', 'drug-drug-interaction', 'icml', 'covid-19', 'drugbank', 'graph-neural-networks', 'deep-learning', 'representation-learning']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO iwashere --
['hacktoberfest', 'covid-19', 'backend', 'spring-boot', 'kotlin', 'self-hosted', 'docker', 'contact-tracing']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO estudos_plotly_chartstudio --
['dados', 'pandas-python', 'matplotlib', 'plotly', 'plotly-dash', 'covid-19', 'bitcoin', 'podcasts']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Air-Traffic --
['jet-fuel', 'demand', 'air-traffic', 'covid-19', 'oil']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO mask-



 -- OBTENIENDO TOPICS DEL REPOSITORIO envirology.github.io --
['sars-cov-2', 'sewage', 'imperial-college-london', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO unics_social --
['react', 'social-network', 'covid-19', 'hacktoberfest']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO My-Projects --
['project', 'python', 'html', 'covid-19', 'covid19-data', 'coursera']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO koodikoira.github.io --
['nlp', 'nlp-machine-learning', 'covid-19', 'blog', 'cv', 'research', 'python3', 'jupyter-notebook']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Python_GUI --
['python', 'gui', 'covid-19', 'tracker']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO agri-ticket-map --
['agriculture', 'taiwan', 'coupons', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Adv_DS_Capstone_Project --
['python3', 'maskdetection', 'covid-19', 'r



 -- OBTENIENDO TOPICS DEL REPOSITORIO li-wenliang --
['covid-19', 'freecodecamp']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO social-data --
['risk', 'data-gathering', 'evictions', 'housing', 'county-level', 'compare-counties', 'burdened-households', 'python-scripts', 'fred', 'preventing-evictions', 'eviction', 'database', 'data-analysis', 'rent', 'cost', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO cdmx_challenge --
['python', 'covid-19', 'mexico', 'sars-cov-2', 'cdmx-challenge']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO teamlovelace --
['machine-learning', 'python3', 'covid-19', 'data-science', 'sagemaker', 'jupyter-notebook', 'aws-s3', 'mobility-data']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO goCrawler --
['golang', 'crawler', 'nlp', 'ngrams', 'text-mining', 'textual-analysis', 'corpus-linguistics', 'covid-19', 'health-data', 'clinical-research', 'redis', 'redis-cache', 'yaml', 



 -- OBTENIENDO TOPICS DEL REPOSITORIO Blockchain-Managed-Social-Distancing-Detector --
['computer-vision', 'blockchain', 'database', 'covid-19', 'social-distancing-detection', 'social-distance-monitoring']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO real-time-twitter-sentiment-analysis --
['python', 'tweepy-api', 'elasticsearch', 'logstash', 'kibana', 'kafka', 'docker', 'streaming', 'sentiment-analysis', 'deep-learning', 'covid-19', 'mongodb', 'docker-container', 'tweets', 'pipeline', 'visualisation']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Documentation --
['covid-19', 'epidemiological-predictions']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO All_Python --
['python3', 'python', 'zodiac', 'tkinter-gui', 'covid-19', 'network-programming', 'network-automation']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO 1_011_a_infektionsfall_uebermittellung --
['covid-19', 'covid-api', 'healthcare', 'pilot', '



 -- OBTENIENDO TOPICS DEL REPOSITORIO inzidenzcom --
['covid-19', 'hacktoberfest', 'hacktoberfest2020', 'germany']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Room-Counter --
['python', 'covid-19', 'machine-learning', 'opencv', 'pytorch']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO flow --
['lims', 'covid-19', 'swabseq', 'labgrid', 'laboratory']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO DarknetMod --
['yolov3', 'darknet53', 'computer-vision', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO BeeSafe --
['tracing-applications', 'covid-19', 'crowd-analysis', 'contact-tracing']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO coburg19 --
['corona', 'covid-19', 'coburg', 'svelte', 'api', 'rki', 'sapper']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO co2ampel --
['co2-sensor', 'co2-ampel', 'co2', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS 

['lighthouse', 'charts', 'johns-hopkins-university', 'dashboard', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO MedVista --
['nodejs', 'mongodb', 'covid-19', 'api-rest']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO kakao-qr --
['nodejs', 'typescript', 'kakaotalk', 'covid-19', 'qrcode', 'cheerio', 'got', 'korean']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Teamwork-API --
['teamwork', 'jwt-bearer-tokens', 'auth0', 'swagger-ui', 'spring-boot', 'covid-19', 'remote-work']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO MoroccoGeoMap2020 --
['maps', 'morocco', 'maghreb', 'world-map', 'geamap', 'covid-19', 'data', 'python']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO higan --
['covid-19', 'blockchain', 'golang']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO 100meter-app --
['covid-19', 'safety', 'location-based']
Esperando 0,5 segundos...


 -- OBTENIENDO TO

['covid-19', 'school', 'education']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Exploratory_Data_Analysis --
['machine-learning', 'machine-learning-algorithms', 'covid-19', 'flowers-classification', 'jupyter-notebook']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Semi-autonomous-UV-sterilization-bot --
['sterilization', 'uva-solutions', 'semi-autonomous', 'esp32', 'arduino', 'bot', 'coppeliasim', 'bldc-motor-controller', 'spiffs-memory', 'path-planning', 'obstacle-avoidance', 'ultrasonic-sensor', 'docking', 'freertos', 'iot-application', 'uv', 'uv-radiation', 'uv-exposure', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO AmboVent --
['covid-19', 'ventilator', 'rmvs', 'ambu']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO srijan-gsoc-2020 --
['federated-learning', 'bioinformatics', 'gsoc-2020', 'pygrid', 'pysyft', 'openmined', 'differential-privacy', 'multiparty-computation', 'gtex', 'covid-19', 



 -- OBTENIENDO TOPICS DEL REPOSITORIO mask-up-js --
['javascript', 'php', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO zurich_walks --
['pedestrian-tracking', 'covid-19', 'zurich', 'gis', 'r', 'analysis']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO pandemia-parliament --
['government', 'politics', 'online-conferences', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO keepyourdistanceclub.github.io --
['covid-19', 'stay-home', 'social-distancing']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Face-Shield --
['covid-19', 'face-sheild', 'face-mask']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO QuarantineFacility --
['covid-19', 'quarantine', 'quarantine-p', 'admin-dashboard', 'django-rest-framework', 'reactjs']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO book --
['covid-19', 'research', 'booking-system', 'university', 'hackaton', 'lauzhackaga



 -- OBTENIENDO TOPICS DEL REPOSITORIO python_assignment --
['python', 'covid-19', 'data', 'pandas', 'numpy', 'matphotlib', 'pyplot', 'datascience', 'tnl']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO C2N3-C3N3 --
['css', 'html', 'javascript', 'css3', 'flexbox', 'sass', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO EvdeKal --
['covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO EpiJS --
['epidemiology', 'modelling', 'covid-19', 'disease-modeling']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO WeiBoMonitor --
['covid-19', 'coolpush']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO picce_recircula --
['covid-19', 'sustainability', 'python', 'streaming']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Offen-in-5-Flowchart --
['oeffnungsperspektive', 'deutschland', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO leitos --
['co

['covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO hospitals-hifld --
['aws-data-exchange', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Equipool --
['android', 'java', 'covid-19', 'xml']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO e-yantra_competition --
['healthcare', 'iot', 'hardware', 'e-yantra', 'covid-19', 'raspberry-pi']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO saapshot --
['camera', 'toolkit', 'tools', 'demo', 'developer', 'testing', 'termux', 'terminal', 'game-engine', 'developer-tools', 'deface', 'wp', 'plugins', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO xrays-and-gradcam --
['fine-tuning', 'vgg16', 'resnet18', 'densenet121', 'gradcam', 'pytorch-implementation', 'covid-19', 'xrays', 'transfer-learning', 'imbalanced-data', 'oversampling', 'radiographs', 'pneumonia', 'deep-learning', 'cnn', 'early-stopping', 'localization']
Esperando 0,



 -- OBTENIENDO TOPICS DEL REPOSITORIO GetMeSlot --
['react-native', 'covid-19', 'vaccination', 'vaccine', 'dose-slot-booking', 'vaccine-availability']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Performed-text-mining-on-tweets-using-keywords-to-find-the-availability-of-hospital-beds-in-the-stat --
['ec2-instance', 'amazon-kinesis-console', 'beds', 'kinesis', 'oxygen', 'covid-19', 'life', 'save']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO vnb --
['telegram', 'cowin', 'rust', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO CowinVaccineNotifier --
['cowin', 'vaccination', 'vaccination-centers', 'covid-19', 'cowin-api', 'cowin-notifier']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Basic-Machine-Learning-Projects --
['machine-learning', 'machine-learning-algorithms', 'machine-learning-projects', 'data-science', 'database', 'deep-learning', 'csv-files', 'linear-regression', 'artificial-intellig



 -- OBTENIENDO TOPICS DEL REPOSITORIO aiwondergirls-icu-ops --
['healthcare', 'covid-19', 'machine-learning', 'mlrun', 'iguazio', 'diabetes-prediction', 'mlops-for-good-hackathon']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO 2021-esos-nces --
['r', 'rstats', 'covid-19', 'nces']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO data-analysis-projects --
['data-analysis', 'covid-19', 'netflix', 'pandas', 'python', 'tableau']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO EO2HO --
['companies-house', 'eat-out-to-help-out', 'covid-19', 'firm-creation']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO InfoG --
['api', 'ip', 'ddd', 'bank-account', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO oxygen --
['covid-19', 'oxymeter', 'hospital']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Discord-Obecnosciobot --
['discord', 'covid-19', 'online-classroom']
Esperando 0,5 

['covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO vund-karte --
['vaccine', 'vaccine-friendly', 'map', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO VaxPass --
['javascript', 'covid-19', 'reactnative', 'expo', 'firebase', 'crypto']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Cowin-Scrape --
['cowin', 'cowin-vaccine-tracker', 'vaccine', 'vaccine-availability', 'python', 'webscraping', 'covid-19', 'covid19-data', 'vaccine-tracker', 'covid-19-vaccination']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO realtime_r0_brazil --
['covid-19', 'coronavirus-analysis', 'hacktoberfest']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Python-Projects --
['python', 'python-project', 'sorting-algorithms', 'sorting-algorithm-visualizations', 'steganography', 'steganography-algorithms', 'sudokusolver', 'pong-game', 'covid-19', 'coronavirus-tracking', 'coronavirus-real-time', 'coronavir

['music', 'choir', 'virtualchoir', 'multi-track', 'multi-track-audio', 'recording', 'recording-app', 'corona', 'covid-19', 'webapp']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO greenpoint-face-shields --
['brooklyn', '3d-printing', 'diy', 'face-shields', 'covid-19', 'greenpoint', 'new-york', 'nyc']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO VirtQueue --
['social-distancing', 'queueing', 'virtual-queue', 'virtualqueue', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Cerberus-Multivent --
['ventilation', 'medicine', 'medical-device', 'covid-19', 'toronto']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO CCCSL-Codes --
['non-pharmaceutical-interventions', 'complexity-science-hub', 'npi', 'covid-19', 'codes', 'visualization', 'clustering', 'heatmap', 'cccsl']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO exposure-notifications-android --
['android', 'covid-19']
Esperando 0,5 se

['yolo', 'yolov3', 'mfc', 'darknet53', 'covid-19']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO geo-heatmap-android --
['worldmap', 'heatmap', 'android-application', 'covid-19', 'kotlin-android', 'data-visualization', 'map-heatmap-android', 'geo-heatmap']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO REMOTE-EDUCATION-APP---EMAGISTER --
['android', 'android-application', 'online-classroom', 'video-conferencing', 'covid-19', 'classroom', 'online-invigilator', 'secure-examination-environment', 'chat', 'chat-room-android', 'assignment-submission-system', 'appointment-management-system', 'appointment-scheduling', 'education', 'teachers', 'video', 'invigilate-students', 'blocking-students']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS DEL REPOSITORIO Data-Analysis-Projects --
['data-visualization', 'data-analysis', 'covid-19', 'text-analysis', 'seaborn', 'matplo', 'plotly', 'cufflinks', 'python']
Esperando 0,5 segundos...


 -- OBTENIENDO TOPICS 

In [50]:
df_topics

Unnamed: 0,id,topics
79,229502804,"[datavisualization, python3, covid-19, dashbor..."
140,141635301,"[iot, water-monitoring, jquery, javascript, ar..."
157,237388368,"[red-cross, github-pages, coronavirus, covid-1..."
458,170732312,"[limnoria, irc, ascii-art, jeopardy, uno, inte..."
842,155705971,"[mongodb, nodejs, api, apirest, javascript, gr..."
...,...,...
178062,268856502,"[contact-tracing, covid-19, iot, smartphone]"
178063,269479371,"[jupyterlab, jupyterhub, covid-19]"
178065,269527538,"[eosc, eosc-secretariat, openweathermap-api, o..."
178068,270013759,"[covid-19, qrcode, frontend]"


Exportamos el resultado a CSV:

In [51]:
df_topics.to_csv(r'repositorios_restantes_con_topics.csv', index = False, encoding="utf-8-sig")

Lo leemos:

In [73]:
df_topics = pd.read_csv('repositorios_restantes_con_topics.csv')
df_topics

Unnamed: 0,id,topics
0,229502804.0,"['datavisualization', 'python3', 'covid-19', '..."
1,141635301.0,"['iot', 'water-monitoring', 'jquery', 'javascr..."
2,237388368.0,"['red-cross', 'github-pages', 'coronavirus', '..."
3,170732312.0,"['limnoria', 'irc', 'ascii-art', 'jeopardy', '..."
4,155705971.0,"['mongodb', 'nodejs', 'api', 'apirest', 'javas..."
...,...,...
1342,268856502.0,"['contact-tracing', 'covid-19', 'iot', 'smartp..."
1343,269479371.0,"['jupyterlab', 'jupyterhub', 'covid-19']"
1344,269527538.0,"['eosc', 'eosc-secretariat', 'openweathermap-a..."
1345,270013759.0,"['covid-19', 'qrcode', 'frontend']"


El campo *id* se ha guardado como *float*. Lo convertimos a *int*:

In [74]:
df_topics.id = df_topics.id.astype(int)
df_topics

Unnamed: 0,id,topics
0,229502804,"['datavisualization', 'python3', 'covid-19', '..."
1,141635301,"['iot', 'water-monitoring', 'jquery', 'javascr..."
2,237388368,"['red-cross', 'github-pages', 'coronavirus', '..."
3,170732312,"['limnoria', 'irc', 'ascii-art', 'jeopardy', '..."
4,155705971,"['mongodb', 'nodejs', 'api', 'apirest', 'javas..."
...,...,...
1342,268856502,"['contact-tracing', 'covid-19', 'iot', 'smartp..."
1343,269479371,"['jupyterlab', 'jupyterhub', 'covid-19']"
1344,269527538,"['eosc', 'eosc-secretariat', 'openweathermap-a..."
1345,270013759,"['covid-19', 'qrcode', 'frontend']"


Repositorios que no tienen *topics*:

In [75]:
df_topics.topics.isnull().sum()

4

Unimos este dataframe con el que tiene todo el contenido de los repositorios:

In [76]:
df_not_in_description_new = df_not_in_description.join(df_topics.set_index('id'), on='id')
df_not_in_description_new

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login,topics
79,229502804,data-visualisation-scripts,Perishleaf/data-visualisation-scripts,Collection of scripts for data visualisation a...,https://api.github.com/repos/Perishleaf/data-v...,False,False,80,80,90,...,True,,https://github.com/Perishleaf/data-visualisati...,Jupyter Notebook,2019-12-22T01:12:45Z,2020-08-06T12:06:03Z,2021-06-10T12:17:53Z,https://github.com/Perishleaf/data-visualisati...,Perishleaf,"['datavisualization', 'python3', 'covid-19', '..."
140,141635301,Water-Monitoring-System,vinitshahdeo/Water-Monitoring-System,Water Monitoring System is an IOT based Liquid...,https://api.github.com/repos/vinitshahdeo/Wate...,False,False,128,128,245,...,True,https://vinitshahdeo.github.io/Water-Monitorin...,https://github.com/vinitshahdeo/Water-Monitori...,HTML,2018-07-19T22:06:05Z,2021-06-09T18:42:30Z,2021-08-06T15:01:36Z,https://github.com/vinitshahdeo/Water-Monitori...,vinitshahdeo,"['iot', 'water-monitoring', 'jquery', 'javascr..."
157,237388368,red-cross,WeileiZeng/red-cross,官方公示数据概览,https://api.github.com/repos/WeileiZeng/red-cr...,False,False,19,19,4,...,True,https://weileizeng.github.io/red-cross/,https://github.com/WeileiZeng/red-cross,Python,2020-01-31T08:22:45Z,2021-07-13T07:01:42Z,2020-05-27T05:59:00Z,https://github.com/WeileiZeng/red-cross,WeileiZeng,"['red-cross', 'github-pages', 'coronavirus', '..."
458,170732312,limnoria-plugins,oddluck/limnoria-plugins,Limnoria plugins I wrote or forked.,https://api.github.com/repos/oddluck/limnoria-...,False,False,33,33,8,...,True,,https://github.com/oddluck/limnoria-plugins,Python,2019-02-14T17:40:29Z,2021-06-23T19:44:25Z,2021-08-14T00:12:47Z,https://github.com/oddluck/limnoria-plugins,oddluck,"['limnoria', 'irc', 'ascii-art', 'jeopardy', '..."
842,155705971,alm-api-backend,Beor18/alm-api-backend,Comenzo como desafio almundo.com y ahora estoy...,https://api.github.com/repos/Beor18/alm-api-ba...,False,False,11,11,2,...,True,https://almundo-examen.herokuapp.com/api/v1/ho...,https://github.com/Beor18/alm-api-backend,JavaScript,2018-11-01T11:37:25Z,2020-07-13T22:22:44Z,2020-09-20T03:56:50Z,https://github.com/Beor18/alm-api-backend,Beor18,"['mongodb', 'nodejs', 'api', 'apirest', 'javas..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178062,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd,"['contact-tracing', 'covid-19', 'iot', 'smartp..."
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi,"['jupyterlab', 'jupyterhub', 'covid-19']"
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1,"['eosc', 'eosc-secretariat', 'openweathermap-a..."
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode,"['covid-19', 'qrcode', 'frontend']"


In [77]:
df_not_in_description_new.topics.isnull().sum()

4

In [57]:
show(df_not_in_description_new)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x18fc5296430>

Lo exportamos a CSV:

In [78]:
df_not_in_description_new.to_csv(r'repositorios_restantes_con_topics_filtrado.csv', index = False, encoding="utf-8-sig")

In [80]:
df_not_in_description_new = pd.read_csv('repositorios_restantes_con_topics_filtrado.csv')
df_not_in_description_new

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login,topics
0,229502804,data-visualisation-scripts,Perishleaf/data-visualisation-scripts,Collection of scripts for data visualisation a...,https://api.github.com/repos/Perishleaf/data-v...,False,False,80,80,90,...,True,,https://github.com/Perishleaf/data-visualisati...,Jupyter Notebook,2019-12-22T01:12:45Z,2020-08-06T12:06:03Z,2021-06-10T12:17:53Z,https://github.com/Perishleaf/data-visualisati...,Perishleaf,"['datavisualization', 'python3', 'covid-19', '..."
1,141635301,Water-Monitoring-System,vinitshahdeo/Water-Monitoring-System,Water Monitoring System is an IOT based Liquid...,https://api.github.com/repos/vinitshahdeo/Wate...,False,False,128,128,245,...,True,https://vinitshahdeo.github.io/Water-Monitorin...,https://github.com/vinitshahdeo/Water-Monitori...,HTML,2018-07-19T22:06:05Z,2021-06-09T18:42:30Z,2021-08-06T15:01:36Z,https://github.com/vinitshahdeo/Water-Monitori...,vinitshahdeo,"['iot', 'water-monitoring', 'jquery', 'javascr..."
2,237388368,red-cross,WeileiZeng/red-cross,官方公示数据概览,https://api.github.com/repos/WeileiZeng/red-cr...,False,False,19,19,4,...,True,https://weileizeng.github.io/red-cross/,https://github.com/WeileiZeng/red-cross,Python,2020-01-31T08:22:45Z,2021-07-13T07:01:42Z,2020-05-27T05:59:00Z,https://github.com/WeileiZeng/red-cross,WeileiZeng,"['red-cross', 'github-pages', 'coronavirus', '..."
3,170732312,limnoria-plugins,oddluck/limnoria-plugins,Limnoria plugins I wrote or forked.,https://api.github.com/repos/oddluck/limnoria-...,False,False,33,33,8,...,True,,https://github.com/oddluck/limnoria-plugins,Python,2019-02-14T17:40:29Z,2021-06-23T19:44:25Z,2021-08-14T00:12:47Z,https://github.com/oddluck/limnoria-plugins,oddluck,"['limnoria', 'irc', 'ascii-art', 'jeopardy', '..."
4,155705971,alm-api-backend,Beor18/alm-api-backend,Comenzo como desafio almundo.com y ahora estoy...,https://api.github.com/repos/Beor18/alm-api-ba...,False,False,11,11,2,...,True,https://almundo-examen.herokuapp.com/api/v1/ho...,https://github.com/Beor18/alm-api-backend,JavaScript,2018-11-01T11:37:25Z,2020-07-13T22:22:44Z,2020-09-20T03:56:50Z,https://github.com/Beor18/alm-api-backend,Beor18,"['mongodb', 'nodejs', 'api', 'apirest', 'javas..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1342,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd,"['contact-tracing', 'covid-19', 'iot', 'smartp..."
1343,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi,"['jupyterlab', 'jupyterhub', 'covid-19']"
1344,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1,"['eosc', 'eosc-secretariat', 'openweathermap-a..."
1345,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode,"['covid-19', 'qrcode', 'frontend']"


Comprobamos cuántos repositorios tienen en sus topics palabras relacionadas:

In [81]:
df_not_in_description_new_boolean = df_not_in_description_new.topics.str.contains(pattern_description)
df_not_in_description_new_boolean

0       True
1       True
2       True
3       True
4       True
        ... 
1342    True
1343    True
1344    True
1345    True
1346    True
Name: topics, Length: 1347, dtype: object

Vemos cuántos son:

In [82]:
df_not_in_description_new_boolean.sum()

1343

In [83]:
df_not_in_description_new_filtrado = df_not_in_description_new[df_not_in_description_new_boolean.fillna(False)]
df_not_in_description_new_filtrado.shape[0]

1343

In [84]:
df_not_in_description_new_filtrado

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login,topics
0,229502804,data-visualisation-scripts,Perishleaf/data-visualisation-scripts,Collection of scripts for data visualisation a...,https://api.github.com/repos/Perishleaf/data-v...,False,False,80,80,90,...,True,,https://github.com/Perishleaf/data-visualisati...,Jupyter Notebook,2019-12-22T01:12:45Z,2020-08-06T12:06:03Z,2021-06-10T12:17:53Z,https://github.com/Perishleaf/data-visualisati...,Perishleaf,"['datavisualization', 'python3', 'covid-19', '..."
1,141635301,Water-Monitoring-System,vinitshahdeo/Water-Monitoring-System,Water Monitoring System is an IOT based Liquid...,https://api.github.com/repos/vinitshahdeo/Wate...,False,False,128,128,245,...,True,https://vinitshahdeo.github.io/Water-Monitorin...,https://github.com/vinitshahdeo/Water-Monitori...,HTML,2018-07-19T22:06:05Z,2021-06-09T18:42:30Z,2021-08-06T15:01:36Z,https://github.com/vinitshahdeo/Water-Monitori...,vinitshahdeo,"['iot', 'water-monitoring', 'jquery', 'javascr..."
2,237388368,red-cross,WeileiZeng/red-cross,官方公示数据概览,https://api.github.com/repos/WeileiZeng/red-cr...,False,False,19,19,4,...,True,https://weileizeng.github.io/red-cross/,https://github.com/WeileiZeng/red-cross,Python,2020-01-31T08:22:45Z,2021-07-13T07:01:42Z,2020-05-27T05:59:00Z,https://github.com/WeileiZeng/red-cross,WeileiZeng,"['red-cross', 'github-pages', 'coronavirus', '..."
3,170732312,limnoria-plugins,oddluck/limnoria-plugins,Limnoria plugins I wrote or forked.,https://api.github.com/repos/oddluck/limnoria-...,False,False,33,33,8,...,True,,https://github.com/oddluck/limnoria-plugins,Python,2019-02-14T17:40:29Z,2021-06-23T19:44:25Z,2021-08-14T00:12:47Z,https://github.com/oddluck/limnoria-plugins,oddluck,"['limnoria', 'irc', 'ascii-art', 'jeopardy', '..."
4,155705971,alm-api-backend,Beor18/alm-api-backend,Comenzo como desafio almundo.com y ahora estoy...,https://api.github.com/repos/Beor18/alm-api-ba...,False,False,11,11,2,...,True,https://almundo-examen.herokuapp.com/api/v1/ho...,https://github.com/Beor18/alm-api-backend,JavaScript,2018-11-01T11:37:25Z,2020-07-13T22:22:44Z,2020-09-20T03:56:50Z,https://github.com/Beor18/alm-api-backend,Beor18,"['mongodb', 'nodejs', 'api', 'apirest', 'javas..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1342,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd,"['contact-tracing', 'covid-19', 'iot', 'smartp..."
1343,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi,"['jupyterlab', 'jupyterhub', 'covid-19']"
1344,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1,"['eosc', 'eosc-secretariat', 'openweathermap-a..."
1345,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode,"['covid-19', 'qrcode', 'frontend']"


Como vemos, todos los repositorios que no tenían palabras claves en el nombre ni en la descripción, tienen topics relacionados con la COVID-19.

---

In [62]:
df_res_final

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178037,267352452,steam-lockdown,digital-wellbeing/steam-lockdown,How has gaming changed during lockdown? Analys...,https://api.github.com/repos/digital-wellbeing...,False,False,0,0,0,...,True,True,https://digital-wellbeing.github.io/steam-lock...,https://github.com/digital-wellbeing/steam-loc...,HTML,2020-05-27T15:10:55Z,2021-02-10T14:26:15Z,2021-02-16T13:48:34Z,https://github.com/digital-wellbeing/steam-loc...,digital-wellbeing
178047,267575087,Smart-Food-Storage,m-boutaleb/Smart-Food-Storage,An intelligent application (very useful during...,https://api.github.com/repos/m-boutaleb/Smart-...,False,False,0,0,0,...,True,True,,https://github.com/m-boutaleb/Smart-Food-Storage,Java,2020-05-28T11:45:23Z,2020-10-04T15:25:00Z,2021-03-23T08:43:26Z,https://github.com/m-boutaleb/Smart-Food-Storage,m-boutaleb
178050,246130172,Airplanes,cgettings/Airplanes,Visualizing flights into LGA at the start of t...,https://api.github.com/repos/cgettings/Airplan...,False,False,0,0,0,...,True,False,,https://github.com/cgettings/Airplanes,HTML,2020-03-09T20:01:16Z,2020-08-10T19:11:27Z,2020-08-10T19:11:31Z,https://github.com/cgettings/Airplanes,cgettings
178051,267858096,face_mask_detection_openvino,mmphego/face_mask_detection_openvino,Detect faces and determine whether people are ...,https://api.github.com/repos/mmphego/face_mask...,False,False,35,35,11,...,True,True,https://blog.mphomphego.co.za/blog/2020/06/02/...,https://github.com/mmphego/face_mask_detection...,Python,2020-05-29T12:56:47Z,2021-02-16T12:18:40Z,2021-05-25T21:53:17Z,https://github.com/mmphego/face_mask_detection...,mmphego


Finalmente, volvemos a actualizar *df_res_final* con los 1.343 repositorios anteriores:

In [70]:
df_res_final = df_res_final.append(df_not_in_description_new_filtrado.drop(columns=['topics']))
df_res_final

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178062,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd
178063,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178065,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178068,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


Y lo exportamos a CSV:

In [71]:
df_res_final.to_csv(r'dataset_repositorios_filtrados.csv', index=False, encoding="utf-8-sig")

--------

### Repositorios con keywords relacionadas creados antes de 2020

Como se vio en el análisis manual del Notebook de Visualizaciones en la sección "Repositorios creados antes de declararse pandemia", la lista de repositorios no relacionados con creación previa a enero de 2020 es la siguiente:

In [2]:
invalidos = [2618529, 7797677, 34158241, 42319556, 42321742, 46790113, 63896251, 64085813, 106683801, 105979787, 
             183196269, 196332961, 225646868, 195876578,  209656058, 231134405, 216056228, 140326771, 172798234, 
             229122289, 21244714, 93610140, 175701328, 194225243, 150090997, 197573508, 213540232]

In [22]:
df = pd.read_csv('dataset_repositorios_filtrados.csv')

In [23]:
df_sin_invalidos = df[~df['id'].isin(invalidos)]
df_sin_invalidos

Unnamed: 0,id,name,full_name,description,contributors_url,archived,disabled,stargazers_count,watchers_count,forks_count,...,has_projects,has_wiki,homepage,html_url,language,created_at,pushed_at,updated_at,svn_url,owner.login
0,238316428,COVID-19,CSSEGISandData/COVID-19,"Novel Coronavirus (COVID-19) Cases, provided b...",https://api.github.com/repos/CSSEGISandData/CO...,False,False,27211,27211,17432,...,True,True,https://systems.jhu.edu/research/public-health...,https://github.com/CSSEGISandData/COVID-19,,2020-02-04T22:03:53Z,2021-08-14T11:35:08Z,2021-08-14T11:56:01Z,https://github.com/CSSEGISandData/COVID-19,CSSEGISandData
1,235374822,COVID-19,midas-network/COVID-19,2019 novel coronavirus repository,https://api.github.com/repos/midas-network/COV...,False,False,642,642,221,...,True,True,,https://github.com/midas-network/COVID-19,TeX,2020-01-21T15:27:23Z,2021-08-12T20:15:14Z,2021-08-12T20:15:21Z,https://github.com/midas-network/COVID-19,midas-network
2,239777191,coronavirus,RamiKrispin/coronavirus,The coronavirus dataset,https://api.github.com/repos/RamiKrispin/coron...,False,False,394,394,175,...,True,True,https://ramikrispin.github.io/coronavirus/,https://github.com/RamiKrispin/coronavirus,R,2020-02-11T14:08:06Z,2021-08-14T08:19:49Z,2021-08-14T08:19:52Z,https://github.com/RamiKrispin/coronavirus,RamiKrispin
3,237129709,coronavirus,FoldingAtHome/coronavirus,Folding@home COVID-19 efforts,https://api.github.com/repos/FoldingAtHome/cor...,False,False,1115,1115,87,...,True,True,https://foldingathome.org,https://github.com/FoldingAtHome/coronavirus,Python,2020-01-30T03:12:19Z,2020-10-27T03:29:18Z,2021-08-09T21:17:36Z,https://github.com/FoldingAtHome/coronavirus,FoldingAtHome
4,234936988,ncov,nextstrain/ncov,Nextstrain build for novel coronavirus SARS-CoV-2,https://api.github.com/repos/nextstrain/ncov/c...,False,False,1208,1208,343,...,True,True,https://nextstrain.org/ncov,https://github.com/nextstrain/ncov,Python,2020-01-19T17:23:17Z,2021-08-13T12:36:06Z,2021-08-13T12:36:10Z,https://github.com/nextstrain/ncov,nextstrain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178061,268856502,iot-contact-tracer,fwcd/iot-contact-tracer,Decentralized contact tracing system supportin...,https://api.github.com/repos/fwcd/iot-contact-...,False,False,1,1,0,...,True,True,,https://github.com/fwcd/iot-contact-tracer,Dart,2020-06-02T16:46:15Z,2020-09-04T13:14:05Z,2020-09-04T15:03:54Z,https://github.com/fwcd/iot-contact-tracer,fwcd
178062,269479371,jupyter,kclhi/jupyter,:whale2: JupyterHub Docker configuration,https://api.github.com/repos/kclhi/jupyter/con...,False,False,2,2,0,...,True,True,https://kclhi.org,https://github.com/kclhi/jupyter,Python,2020-06-04T22:35:16Z,2021-06-11T18:23:39Z,2021-08-14T14:32:53Z,https://github.com/kclhi/jupyter,kclhi
178063,269527538,OpenWeatherMap-concat-histories,ALT-F1/OpenWeatherMap-concat-histories,Concatenate histories weather data collected b...,https://api.github.com/repos/ALT-F1/OpenWeathe...,False,False,1,1,0,...,True,True,http://www.alt-f1.be,https://github.com/ALT-F1/OpenWeatherMap-conca...,Python,2020-06-05T04:04:46Z,2021-08-12T13:30:11Z,2020-06-08T12:44:27Z,https://github.com/ALT-F1/OpenWeatherMap-conca...,ALT-F1
178064,270013759,frontend,Tackode/frontend,Frontend for Tackode,https://api.github.com/repos/Tackode/frontend/...,False,False,0,0,0,...,True,True,https://tackode.com,https://github.com/Tackode/frontend,Vue,2020-06-06T14:57:50Z,2021-08-12T04:05:02Z,2021-01-11T08:38:35Z,https://github.com/Tackode/frontend,Tackode


Exportamos con el dataset actualizado:

In [24]:
df_sin_invalidos.to_csv(r'dataset_repositorios_filtrados_final.csv', index = False, encoding="utf-8-sig")