# Helper Functions

In this notebook you find all the helper functions you can access and use in all the notebooks within the project.

## Access

Copy paste this import in the notebook where you want to use the functions:
   - from ipynb.fs.full.functions import *
   
Or when you only want to import a specific function
   - from ipynb.fs.full.functions import getspecificolumns

## Functions

<b>Note!</b> If functions don't work, check if column names and such are up to date.

### Imports

In [5]:
import pandas as pd
import requests
import hashlib
import os.path, time
from bs4 import BeautifulSoup

### Data

In [6]:
testdata = pd.read_csv("calls/call-c85aac6308a09d9112b7235ab11f329c3178fd29f457deda02aec99a924e4c81.csv")

### Functions

In [7]:
# Returns dataframe with only the website column
def GetWebsites():
    return testdata["1"]

# Returns dataframe with all columns in initial dataframe
def GetAllColumns():
    return testdata

# Returns dataframe with specific columns
# Expects list with column names as param
def GetSpecificColumns(column_list):
    df = pd.DataFrame()
    for column in column_list:
        df[column] = testdata[column]
    return df

# Returns soup of given website
def RequestSoup(webpage):
    r = requests.get(webpage)
    return BeautifulSoup(r.content)

# Function used to hash a given variable
# Input: variable, output: SHA256 hashed variant of that variable
def hash_variable(variable):
    string_variable = str(variable)
    hashed_string = hashlib.sha256(string_variable.encode('utf-8'))
    hashed_variable = hashed_string.hexdigest()
    return hashed_variable

# Function to check if a certain query has already been executed before, and if so, use that result
# Input: hashed parms from hash_variable, file_name (for example "call") and potential_overwrite (True or False)
# Output: either True (if file exists) or False (if file doesn't exist)
# Also makes use of global variable potential_overwrite, which is either True or False,
# and returns accordingly
def existing_file(params_hashed, filename, potential_overwrite):
    try: # Check if file exists
        with open(f"calls\call-{params_hashed}.csv"):
            potential_csv_file_exists = 1
    except IOError:
        potential_csv_file_exists = 0
        return False
    
    if potential_csv_file_exists == 1: # If file exists, load from file, unless potential_overwrite is True
        print("Deze call is al een keer uitgevoerd, ", end = '')
        if potential_overwrite != True:
            print("we gaan verder met de bestaande resultaten.")
            print("Dit bestand is laatst gewijzigd op: %s" % time.ctime(os.path.getmtime(f"calls\{filename}-{params_hashed}.csv")))
            return True
        else:
            print("we maken echter nieuwe resultaten aan.")
            return False