# Dependencies loader

Given the names of a set of <b>organizations</b> already stored in the database, this notebook build <b>manifest</b> and <b>parsed</b> SBoM for each repository, then it store <b>dependencies</b> and <b>packages</b> in the database.

<hr>

In [None]:

organizations = ["italia","opendatatrentino"] # Set here the GitHub username of the organizations

# IMPORTANT: if you want to collect dependencies of the organizations obtained from GitHub & Government list (notebook_1 (gov_org_loader).ipynb)
# you have to set the following variable to True. In this case, only these organizations will be analyzed (and the ones in the above list will be ignored)

only_orgs_from_government_list = True # True

#### Requirements

In [None]:
#! npm install check-imports # For collecting JavaScript and TypeScript parsed dependencies
#! pip install pipreqsnb # For collecting Python and Jupiter Notebook parsed dependencies

# Moreover, in order to detect all the packages, the following are required (from https://github.com/microsoft/component-detection/blob/main/docs/feature-overview.md):
# Conda v4.10.2+        -> for Conda dependencies
# Gradle 7              -> for Java dependencies (Gradle)
# Go 1.11+              -> for Go dependencies
# Maven                 -> for Java dependencies (Maven)
# Internet connection   -> for Python (PyPi) transitive dependencies

<hr>

#### Logger set up

In [None]:
import logging, os, datetime,sys
from pathlib import Path
Path('logs').mkdir(parents=True,exist_ok=True)
# Logging Levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
logging.basicConfig(#filename=os.path.join('logs','sbom_creator',str(datetime.datetime.now().strftime("%d-%m-%Y T%H %M %S")) +'.log'),
handlers=[
        logging.FileHandler(os.path.join('logs','log-'+str(datetime.datetime.now().strftime("%d-%m-%Y")) +'.log')),
        logging.StreamHandler(sys.stdout)
    ],
                    format='%(asctime)s |:| LEVEL:%(levelname)-2s |:| FILE:notebook_2 (dep_loader).ipynb:%(lineno)-s |:| %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.DEBUG)
#if not logging.getLogger().handlers: logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
logging.getLogger("urllib3").propagate = False

#### Get organizations from database

In [None]:
from lib.sqlite_utils import DBConnection 

if not os.path.exists(os.path.join('database','database.sqlite')):
    logging.critical('Database does not exists! You need to create it first (db_builder.ipynb)')
    raise Exception('Database does not exists! You need to create it first (db_builder.ipynb)')

conn=DBConnection(os.path.join('database','database.sqlite'))

if only_orgs_from_government_list:
    organizations = conn.query('SELECT url,name,user_name FROM organization WHERE section!="NULL" and category!="NULL"')
    organizations = [dict(zip(['url','name','user_name'],org)) for org in organizations]
else:
    for org in range(len(organizations)):
        try:
            organizations[org] = conn.get_rows('organization',{'url':'https://github.com/{}'.format(organizations[org])})[0]
            logging.info('Found organization "{}" in the database!'.format(organizations[org]['user_name']))
        except IndexError as err:
            logging.warning('Cannot find organization "{}" in the database!'.format(organizations[org]))
            organizations.remove(organizations[org])
            continue

 

#### SBoM creation and dependencies collection

In [None]:
from lib.github import download_repo
from lib.deps_utils import js_parsed_deps,py_parsed_deps,get_deps_from_sbom
from pathlib import Path
import os.path
from lib.system_utils import exc,delete_file_with,unique_dictionaries
import shutil

if not os.path.exists(os.path.join('sbom-tool','sbom-tool.exe')):
    logging.critical('Cannot find sbom-tool.exe executable! You need to provide sbom-tool.exe inside the sbom-tool folder!')
    raise Exception('Cannot find sbom-tool.exe executable! You need to provide sbom-tool.exe inside the sbom-tool folder!')

# List from paper: https://arxiv.org/abs/1710.04936 +'example'
tokens=['development','optional','enhances','suggests','build','configure','test','develop','dev','example','doc']


Path('sbom').mkdir(parents=True,exist_ok=True)
Path(os.path.join('sbom','manifest')).mkdir(parents=True,exist_ok=True)


manifest_dependencies,parsed_dependencies, packages = list(),list(),list()

for organization in organizations:
    repositories = conn.get_rows('repository', {'organization':organization['url']})

    logging.info('Starting dependencies collection for organization "{}"'.format(organization['user_name']))
    for repo in repositories:
    
        org_folder = os.path.join('sbom','manifest',organization['user_name'])
        if os.path.exists(os.path.join(org_folder,'man_{}_{}_sbom.json'.format(organization['user_name'],repo['name']))):
            logging.warning('A SBoM file for repository "{}" already exists. Skipping repository...'.format(repo['name']))
            continue
        Path(org_folder).mkdir(parents=True,exist_ok=True)
        Path(os.path.join(org_folder,repo['name'])).mkdir(parents=True,exist_ok=True)
        
        logging.info('Downloading repository "{}" ...'.format(repo['name']))
        try:
            download_repo(organization['user_name'],repo['name'],os.path.join('sbom','manifest',organization['user_name'],repo['name']),branch=repo['default_branch'])
        except Exception as e:
            logging.warning('Cannot download repository "{}" of organization "{}". Continue with others ...'.format(repo['name'],organization['user_name']))
            shutil.rmtree(os.path.join(org_folder,repo['name']), ignore_errors=True, onerror=None)
            try:
                os.remove(os.path.join(org_folder,repo['name']+'.zip'))
            except:
                continue
            continue
        logging.info('Executing sbom-tool on repository "{}" ...'.format(repo['name']))
        try:
            res=exc(""".\sbom-tool\sbom-tool.exe generate -b {0}
                                         -bc {0} 
                                         -pn {1} 
                                         -pv 1.0 
                                         -ps {2} 
                                         -nsb https://github.com/{2} 
                                         -m {3}""".format(os.path.join(org_folder,repo['name']),
                                                          repo['name'],
                                                          organization['user_name'],
                                                          org_folder).replace('\n',''))
        except Exception as e:
            shutil.rmtree(os.path.join(org_folder,repo['name']), ignore_errors=True, onerror=None)
            logging.warning('An error occurred while running sbom-tool for repository "{}". The execution proceeds skipping this repository.'.format(repo['name']))
            continue
        shutil.move(os.path.join(org_folder,'_manifest','spdx_2.2','manifest.spdx.json'), os.path.join(org_folder,'man_{}_{}_sbom.json'.format(organization['user_name'],repo['name'])))
        shutil.rmtree(os.path.join(org_folder,'_manifest'), ignore_errors=True, onerror=None)

        dependencies = get_deps_from_sbom(os.path.join(org_folder,'man_{}_{}_sbom.json'.format(organization['user_name'],repo['name'])),repo['name'])

        js_parsed_dependencies,py_parsed_dependencies = [],[]

        if repo['language'] in ['Python','Jupyter Notebook']:
            delete_file_with(os.path.join(org_folder,repo['name']),tokens) # Delete file with 'dev,example,...' tokens
            try:
                py_parsed_dependencies = py_parsed_deps(org_folder,organization['user_name'],repo['name'],dependencies)
            except Exception as e:
                shutil.rmtree(os.path.join(org_folder,repo['name']), ignore_errors=True, onerror=None)
                logging.warning('An error occurred while running pipreqsnb for the "{}" repository. The execution proceeds without getting Python parsed_dependencies for the repository. Error:\n {}'.format(repo['name'],e))
        
        if repo['language'] in ['JavaScript','TypeScript']:
            delete_file_with(os.path.join(org_folder,repo['name']),tokens) # Delete file with 'dev,example,...' tokens
            logging.info('Executing check-imports on repository "{}" ...'.format(repo['name']))
            try:
                js_parsed_dependencies = js_parsed_deps(os.path.join(org_folder,repo['name']))
            except Exception as e:
                shutil.rmtree(os.path.join(org_folder,repo['name']), ignore_errors=True, onerror=None)
                logging.warning('An error occurred while running check-imports for the "{}" repository. The execution proceeds without getting JavaScript parsed_dependencies for the repository. Error:\n {}'.format(repo['name'],e))
            logging.info('Executing pipreqsnb on repository "{}" ...'.format(repo['name']))
        
        packages.extend(list(unique_dictionaries(dependencies + js_parsed_dependencies + py_parsed_dependencies)))


        manifest_dependencies.extend([{ 'package':dep['purl'],'repository':repo['url']} for dep in dependencies])
        parsed_dependencies.extend([{ 'package':dep['purl'],'repository':repo['url']} for dep in js_parsed_dependencies + py_parsed_dependencies])

        shutil.rmtree(os.path.join(org_folder,repo['name']), ignore_errors=True, onerror=None)

    logging.info('Dependencies collection completed for organization "{}"!'.format(organization['user_name']))
 

#### Store packages and dependencies in the database

In [None]:
logging.info('Store collected packages and dependencies in the database for organization {} ...'.format(organization['user_name']))

for package in packages:
    conn.add_or_update('package',package)

for manifest_dependency in manifest_dependencies:
    conn.add_or_update('manifest_dependency',manifest_dependency)

for parsed_dependency in parsed_dependencies:
    conn.add_or_update('parsed_dependency',parsed_dependency)

logging.info('Storing of packages and dependencies in the database for organization {} completed!'.format(organization['user_name']))

### Close database connection

In [None]:
conn.close()