## Question Answering Pipeline Tests using ML-CHAMP

Now that we have an initial working QA pipeline, we can begin to fine-tune the various components using ML-CHAMP to test and document various changes and settings while documenting the code that was generated, FAISS indexes, etc.

In [1]:
import json
import os
import pprint
import sys

# If you haven't pip installed the software, you can load it into memory as shown below.
#  - be sure to change ml_champ_home to match the location of your copy of ML-CHAMP.
# Temporarily simulate that the ML CHAMP library has been pip installed.
# This section can be removed after there is a stable python library build.
# Set this to where you have downloaded ML CHAMP.
# ml_champ_home = 'D:/JupyterPrograms/00-ML-CHAMP/ML-CHAMP'  
# print('ml_champ_home:', ml_champ_home)
# sys.path.append(ml_champ_home)

from ml_champ import Project
from ml_champ import Ensemble

%load_ext autoreload
%autoreload 2

In [2]:
project_name = 'QA_Pipeline_Testing'
project = Project(project_name)

# print(dir(project))
print(dir(project.model))

# project = Project()
# project.create_project(project_name=project_name)
summary_model, summary_tokenizer = None, None
question_answer_model, question_answer_tokenizer = None, None

	working_directory: D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing
Importing modules...
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'get_ai_project', 'get_history_record', 'postprocessing_commands', 'preprocessing_commands', 'run_postprocessing', 'run_preprocessing', 'run_preprocessing_NO', 'test_model', 'train_model']


In [3]:
def preload_models_as_needed():
    global project
    global summary_model
    global summary_tokenizer
    global question_answer_model
    global question_answer_tokenizer
    
    ai_project = project.model.get_ai_project()
    from ai_project.qa_pipeline.models import model_settings

    # from model_settings import get_summarization_model_and_tokenizer
    if summary_model and summary_tokenizer:
        if not model_settings.summarization_model and not model_settings.summarization_tokenizer:
#             print('SETTING SUMMARIZER OBJECTS from Jupyter')
            model_settings.set_summarization_model_and_tokenizer(summary_model, summary_tokenizer)
#         else:
#             print('Summarization model and tokenizer are set.')
#             print(summary_model)
#             print(summary_tokenizer)
#             print(model_settings.summarization_model)
#             print(model_settings.summarization_tokenizer)

    # from model_settings import get_qa_model_and_tokenizer
    if question_answer_model and question_answer_tokenizer:
        if not model_settings.qa_model and not model_settings.qa_tokenizer:
#             print('SETTING QA OBJECTS from Jupyter')
            model_settings.set_qa_model_and_tokenizer(question_answer_model, question_answer_tokenizer)
#         else:
#             print('Question/answer model and tokenizer are set.')
#             print(question_answer_model)
#             print(question_answer_tokenizer)
#             print(model_settings.qa_model)
#             print(model_settings.qa_tokenizer)
    
def set_models_as_needed():
    global project
    global summary_model
    global summary_tokenizer
    global question_answer_model
    global question_answer_tokenizer
    
    ai_project = project.model.get_ai_project()
    from ai_project.qa_pipeline.models import model_settings
    
    if not summary_model or not summary_tokenizer:
#         print('GETTING SUMMARY OBJECTS')
        summary_model, summary_tokenizer = model_settings.get_summarization_model_and_tokenizer()
        
    if not question_answer_model or not question_answer_tokenizer:
#         print('GETTING QA OBJECTS')
        question_answer_model, question_answer_tokenizer = model_settings.get_qa_model_and_tokenizer()

In [4]:
project = Project(project_name)
preload_models_as_needed()
    
preprocessing_command = {"number_of_qa_sentences_to_generate": 3}
# preprocessing_command = {"check_for_new_docs": True}

# Set the command and train from the previous run
project.model.run_preprocessing(preprocessing_command)
project.model.train_model()
set_models_as_needed()

	working_directory: D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing
Importing modules...
CLEARED previous modules.


  from .autonotebook import tqdm as notebook_tqdm


Called AI_Project.AI_Model.run_preprocessing()...
Loaded index from D:/JupyterPrograms/0-CHAT_GPT/EXPERIMENTS/ML_CHAMP/data/doc_index.index
Loaded sentence_to_index_mapping from D:/JupyterPrograms/0-CHAT_GPT/EXPERIMENTS/ML_CHAMP/data/sentence_to_index_mapping.json
Final length of the index: 831
Called AI_Project.AI_Model.train_model()
Initialized History_Record for project_directory:
	 D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing

History record BUILDING RUN:
	project_name: Default
	version: None
	experiment_name: None
	mlflow_experiment_id: Default
tracking_directory set to: D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing\build
Loading experiment_location in historical record...
Generating a run in order to determine its guid name
TRACKING - GENERATING RUN...

  GENERATING RUN RECORD...
Backing up code to the artifact directory.
Forcing reload of config file...
Running LMM Tests...
Sim score is 0.5060945749282837

In [5]:
project.start_server()
project.start_ml_champ_server()

Checking for running WINDOWS server on port 5001 using command: netstat -aon | findstr ":5001 "
project_directory: D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing
build_uri_directory: file:///D:/JupyterPrograms/0-CHAT_GPT/EXPERIMENTS/ML_CHAMP/QA_Pipeline/QA_Pipeline_Testing/build
Need the following:
	 mlflow ui --backend-store-uri file:///D:/JupyterPrograms/0-CHAT_GPT/EXPERIMENTS/ML_CHAMP/QA_Pipeline/QA_Pipeline_Testing/build
Server started at file:///D:/JupyterPrograms/0-CHAT_GPT/EXPERIMENTS/ML_CHAMP/QA_Pipeline/QA_Pipeline_Testing/build 
	Log in at http://127.0.0.1:5001
Checking for running WINDOWS server on port 5000 using command: netstat -aon | findstr ":5000 "
project_directory: D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline_Testing
Checking for running WINDOWS server on port 5000 using command: netstat -aon | findstr ":5000 "
Server started at D:\JupyterPrograms\0-CHAT_GPT\EXPERIMENTS\ML_CHAMP\QA_Pipeline\QA_Pipeline

In [7]:
# project.stop_server(5001)      # Default is 5001 (MLflow server)
# project.stop_server(5000)  # Default ML CHAMP server port
# project.stop_server(5002)

Checking for running WINDOWS server on port 5001 using command: netstat -aon | findstr ":5001 "
Checking for running WINDOWS server on port 5000 using command: netstat -aon | findstr ":5000 "

Found running server on pid 25884 from
	   TCP    0.0.0.0:5000           0.0.0.0:0              LISTENING       25884
Shutting down server on port 5000 and pid 25884
Checking for running WINDOWS server on port 5002 using command: netstat -aon | findstr ":5002 "
