### Objective

Use langchain to create a bot that knows to call ML surrogate model to calculate motor loss

#### 1. Preprocessing

##### 1.1 Loading packages

In [1]:
# import necessary packages 
from collections import defaultdict
import numpy as np
import sklearn.gaussian_process as gp
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from pathlib import Path
from scipy.stats import qmc
import os
import sys
import utilities

# Machine learning models
from sklearn.linear_model import LinearRegression
import sklearn.gaussian_process as gp
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, MinMaxScaler 
from sklearn.pipeline import Pipeline

# Custom Gaussian Process model
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\GaussianProcess")
from GPInterpolator import GPInterpolator

np.set_printoptions(precision=4,suppress=True)

##### 1.2 Loading data

In [2]:
# Load dataset
normal_df = pd.read_csv('./full_grid_exploration/Camp_1/normal_op.csv')
FW_df = pd.read_csv('./full_grid_exploration/Camp_1/field_weakening_op.csv')
ops_dict = {
    'normal-op': normal_df,
    'field-weakening': FW_df
}
total_psi_levels = normal_df.psi_ref.unique().tolist()
print(f"Available Psi_ref levels: {total_psi_levels}")

Available Psi_ref levels: [50, 70, 90, 100, 110]


#### 2. ML analysis

In [3]:
model_hyperparams = {
    'linear regression': {},
    'polynomial regression': {'degree': 2},
    'random forest': {'n_estimators': 800, 'random_state': 42, 'n_jobs': -1, 'max_features': 0.5,
                     'max_samples': 0.8, 'max_depth': 12},
    'XGBoost': {'n_estimators': 1000, 'max_depth': 4, 'learning_rate': 0.05, 'reg_lambda': 0,
               'gamma': 0, 'subsample': 0.2, 'colsample_bytree': 1, 'random_state': 10, 'n_jobs': -1},
    'Gaussian Process (sklearn)': {
        'kernel': gp.kernels.ConstantKernel(1.0, (1e-3, 1e3)) * gp.kernels.RBF(1.0, (1e-3, 1e3)),
        'optimizer': 'fmin_l_bfgs_b',
        'n_restarts_optimizer': 100,
        'alpha': 1e-10,
        'normalize_y': True,
        'random_state': 10
    },
    'Gaussian Process': {
        'n_restarts': 100,
        'kernel': 'Gaussian',
        'trend': 'Const',
        'opt': {'optimizer':'L-BFGS-B', 'jac': True}
    }
}

In [4]:
def model_fit(model_type, X_train, y_train, hyperparams):
    """This function is used for training ML models."""
    
    # Construct pipeline
    if model_type == 'linear regression': 
        model = Pipeline([
            ('scaler', StandardScaler()),
            ('regressor', LinearRegression())
        ])
        
    elif model_type == 'polynomial regression':
        model = Pipeline([
            ('poly', PolynomialFeatures(**hyperparams[model_type])),
            ('scaler', StandardScaler()),
            ('regressor', LinearRegression())
        ])
   
    elif model_type == 'Gaussian Process (sklearn)':
        # sklearn GP
        model = Pipeline([
            ('scaler', StandardScaler()),
            ('regressor', gp.GaussianProcessRegressor(**hyperparams[model_type]))
        ])
    
    elif model_type == 'Gaussian Process':
        # Custom GP
        model = Pipeline([
            ('scaler', MinMaxScaler()),
            ('regressor', GPInterpolator(**hyperparams[model_type]))
        ])
    
    elif model_type == 'random forest':
        model = RandomForestRegressor(**hyperparams[model_type])
    
    elif model_type == 'XGBoost':
        model = xgb.XGBRegressor(**hyperparams[model_type])
    
    else:
        raise KeyError('Unrecognized model type!')
    
    # Fit the pipeline
    model.fit(X_train, y_train)
    
    return model

Training ML surrogate model

In [5]:
# Setting conditions
df_dict = ops_dict
input_col = ['input: SF', 'input: FN', 'psi_ref']
mode = 'combo-op'
target = 'loss ratio'
model_type = 'XGBoost'
test_ratio = 0.2
repeat_exp = 20

# Step 1: Select dataset
X, CIMAC_loss, CIMTD_loss = utilities.data_selector(psi_ref=total_psi_levels, mode=mode, df_dict=df_dict, 
                                                    input_col=input_col)

# Step 2: Train/test data
X_train, y_train, X_test, y_test, CIMAC_loss, CIMTD_loss = utilities.train_test_data_creator(X, CIMAC_loss, 
                                                                                   CIMTD_loss, target, seed=42, 
                                                                                   test_ratio=test_ratio,
                                                                                   verbose=False)
# Step 3: Build & evaluate model
ML_model = model_fit(model_type, X_train, y_train, model_hyperparams)

#### 3. LangChain agent

##### 3.1 Define custom tool

In [6]:
from langchain.tools import tool

@tool("motor_loss_prediction")
def motor_loss_predict(Torque: float, Supply_frequency: float, Flux_level: float) -> float:
    """Calculate the motor loss based on the given motor operating condition, which includes 
    torque, supply frequency, and flux level."""
    loss_ratio = ML_model.predict(np.array([Torque, Supply_frequency, Flux_level]).reshape(1, -1))[0]
    
    return loss_ratio

In [None]:
@tool("plot_figure")
def plot_figure(x: np.ndarray, y: np.ndarray) -> float:
    """Calculate the motor loss based on the given motor operating condition, which includes 
    torque, supply frequency, and flux level."""
    loss_ratio = ML_model.predict(np.array([Torque, Supply_frequency, Flux_level]).reshape(1, -1))[0]
    
    return fig

In [None]:
import os
import openai
from langchain.llms import AzureOpenAI
from langchain.agents import load_tools, Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType

# Set up OpenAI
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = "azure"
openai.api_version = "2023-03-15-preview"
openai.api_base = "https://abb-chcrc.openai.azure.com/"

# Load the language model used to control the agent.
llm = AzureOpenAI(
        model_name="text-davinci-003",
        deployment_name='deployment-5af509f3323342ee919481751c6f8b7d')

# Define agent
agent = initialize_agent([motor_loss_predict], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, 
                         verbose=True)

# Run agent
agent.run("""Please identify all the supply frequencies whose associated loss value is lower than 1.1. 
Say supply frequency can vary from 10 to 70, at a step of 10.
The torque value is kept to 1 and the flux level value is kept to 100. 
Please calculate of different parameter combinations individually.
""")

In [None]:
ML_model.predict(X_test[:1])[0]

In [None]:
X_train

## Repository

In [None]:
# Templates
Q1 = """I need you to calculate the motor loss given different operating conditions. 
Each operating condition is defined by a tuple (torque, supply frequency, flux level).
Here are the operating conditions: (0.2, 10, 50), (0.2, 30, 50), (1, 30, 70)."""

Q2 = """I need you to calculate the motor loss given different operating conditions. 
Each operating condition is defined by a tuple (torque, supply frequency, flux level). Based
on your calculated losses, please tell me under which operating condition the machine has
the minimum loss.

Here is the operating conditions: (0.2, 10, 50), (0.2, 30, 50), (1, 30, 70)."""

Q3 = """I need you to evaluate the motor loss under different operating conditions. 
The possible motor torque value is 0.2 and 0.4. The possible supply frequency value is 
20, and 50. And the flux level value can only be 100. Please calculate motor losses given 
all possible combinations of operating conditions, and tell me which operating condition
has the minimum loss."""

Q4 = """I want to know how the motor loss varies with supply frequency. Say supply frequency varies from 
10 to 75, at a step of 5. For different supply frequency values, the torque value is kept to 1, 
and the flux level value is kept to 100. Please calculate different parameter combination individually. 
"""

In [None]:
from langchain.chat_models import AzureChatOpenAI
from langchain.agents import load_tools, Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
# from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# Load the language model used to control the agent.
llm = AzureChatOpenAI(
        openai_api_base="https://abb-chcrc.openai.azure.com/",
        openai_api_version="2023-03-15-preview",
        openai_api_key='b4f6a457f6914c2b85fb46dd4f9abfe4',
        openai_api_type="azure",
        deployment_name="gpt-35-turbo-0301", streaming=True,)
#         callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))


# Load defined tools (surrogate prediction)
# tools = load_tools(["motor_loss_prediction"], llm=llm)

# Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use.
agent = initialize_agent([motor_loss_predict], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, 
                         verbose=True)

# Now let's test it out!
agent.run("""I need you to calculate the motor loss under the following operating condition: 
the motor torque is 0.2, the supply frequency is 10, and the flux level is 50.""")

In [None]:
# Useful code
from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
from langchain import OpenAI, SerpAPIWrapper, LLMChain

search = SerpAPIWrapper()
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

prefix = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:"""
suffix = """Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Args"

Question: {input}
{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools, 
    prefix=prefix, 
    suffix=suffix, 
    input_variables=["input", "agent_scratchpad"]
)

llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)

tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)

agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

agent_executor.run("How many people live in canada as of 2023?")