### A prototyping notebook for quickly testing and troubleshooting new functions or issues

#### Imports

In [None]:
import os
import src.api_call as ac
import pandas as pd
import src.text_process as tp
from sklearn.model_selection import train_test_split
import src.bert_model as bm
import src.model_evaluation as me
from pathlib import Path

#### API test section

In [None]:
# Change to true if testing API code
testing_api = False
if testing_api:
    ''' include API test code'''

else:
    # Default to locally saved text data
    print ('Using local test files')
    test_set = {
    'Blockchain':0,
    'Cryptocurrency':1, 
    'Genetic engineering':2, 
    'Machine learning':3, 
    'Nanotechnology':4, 
    'Quantum computing':5, 
    'Robotics':6, 
    'Social engineering':7, 
    'Space exploration':8, 
    'Virtual reality':9
    }
    


#### Text processing

In [None]:
# Change to true if testing text processing
testing_text_process = False

all_text = pd.DataFrame()

if testing_text_process:
    '''
    text processing tests here
    '''
else:
    # Default to text processing used in main
    for key, value in test_set.items():
        data = pd.DataFrame(pd.read_csv(f'raw_data/{str(key)}.csv'))
        data = tp.english_papers(data, 'English')
        data = tp.remove_hyperlinks(data)
    
        # tokenize text into sentences and convert to dataframe
        data = pd.DataFrame(tp.text_clean(data['fullText']))
    
        # add column for encoding
        data['Code'] = value

        # rename columns
        data.rename(columns = {0:'Text'}, inplace=True)

        # append to final dataframe
        all_text = all_text.append(data, ignore_index = True)
    all_text.drop_duplicates(inplace = True)

#### Model building

In [None]:
testing_model = False

if testing_model:
    '''
    model tests here
    '''
else:
    # Default to model structure/functions used in main
    x_train,x_test,y_train,y_test = train_test_split(all_text['Text'],all_text['Code'])

    #generate a model
    model = bm.generate_model(len(test_set))

    #train model
    model_history, train_time, eval = bm.compile_fit_evaluate(model, x_train, y_train, x_test, y_test)

#### Evaluation

In [None]:
testing_evaluation = False

if testing_evaluation:
    '''
    evaluation tests here
    '''
else:
    # Default to evaluation used in main
    
    #generate confusion matrix, save to local file
    me.confusion_matrix(model, x_test, y_test, model_name)

    # save text and model information
    me.save_model_data(model,eval,model_history,model_name)