## Import Libraries

In [None]:
!pip install ibm-watson
!pip install seaborn
!pip install pandas
!pip install tqdm
!pip install matplotlib

In [None]:
from IPython.display import HTML
from ibm_watson import AssistantV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import pandas as pd    
import json
import time
from tqdm import tqdm
import seaborn as sns
import matplotlib as plt
from datetime import date
import statistics

today = date.today()

## API Details

In [None]:
authenticator = IAMAuthenticator('')
assistant = AssistantV2(
    version='2020-04-01',
    authenticator = authenticator
)
assistant.set_service_url('')
assistant_id = ''

In [None]:
ResponseList = []
IntentList = []
ConfidenceList = []
ExpectedList = []
SecondIntent = []
SecondConfidence = []



MatchList = []
ConfidenceChangeList = []


LatestHistoricMatchConfidence = []
LatestHistoricUnmatchConfidence = []
LatestHistoricAverageConfidence = []
LatestHistoricCorrectResponse = []
LatestHistoricDate = []




RegressionFile = pd.read_csv('regression.csv')
HistoricDF = pd.read_csv('RegressionLog.csv')

## Load the last set of results and questions into a list

In [None]:
QuestionList = RegressionFile['Question'].tolist()
ExpectedList = RegressionFile['Expected'].tolist()
ScoreList = RegressionFile['Confidence'].tolist()

## Send questions to Watson and log the response

In [None]:
def Sender():
    response = assistant.message_stateless(
    assistant_id,
    input={
        'message_type': 'text',
        'text': x,
     'options': {
            'return_context': True
        }
    },
    context={
        'skills': {
            'main skill': {
                'user_defined': {
                    'ConfidenceTester': 'Yes'
                }
            }
        }
    }
).get_result()
    pbar.update(1) #Updates the progressbar
    IntentList.append((response['context']['skills']['main skill']['user_defined']['Intent']['intent']))
    ConfidenceList.append((response['context']['skills']['main skill']['user_defined']['Intent']['confidence']))
    SecondIntent.append((response['context']['skills']['main skill']['user_defined']['Intent1']['intent']))
    SecondConfidence.append((response['context']['skills']['main skill']['user_defined']['Intent1']['confidence']))

## Loop through and show progress bar

In [None]:
pbar = tqdm(total=len(QuestionList),desc="Percentage complete") 
for x in QuestionList:
    Sender()
pbar.close()

## Case Match

In [None]:
IntentList = [item.lower() for item in IntentList] 
ExpectedList = [item.lower() for item in ExpectedList]
UniqueList = list(dict.fromkeys(IntentList)) # List all the unique names for each intent in here.

## Differential Calculation

In [None]:
DifferentialDict = {
    "Confidence 1": (ConfidenceList),
    "Confidence 2":(SecondConfidence)
    
}

DifferentialDF = pd.DataFrame(DifferentialDict)

Differential = DifferentialDF["Confidence 1"] - DifferentialDF["Confidence 2"]

DifferentialDict = {
    "Intent 1": (IntentList),
    "Confidence 1": (ConfidenceList),
    "Intent 2": (SecondIntent),
    "Confidence 2":(SecondConfidence),
    "Difference": (Differential)
}

DifferentialDF = pd.DataFrame(DifferentialDict)
Difference1 = statistics.mean(ConfidenceList)
Difference2 = statistics.mean(SecondConfidence)

'''Calculate Clarity for the log'''
Difference1 = statistics.mean(ConfidenceList)
Difference2 = statistics.mean(SecondConfidence)
Clarity = round((Difference1 - Difference2) * 100, 2)

## Low Confidence and Match count

In [None]:
if len(IntentList) == len(ExpectedList):
    for i in range(len(IntentList)):
        if IntentList[i] == ExpectedList[i]:
         MatchList.append('Matched')
        else: 
         MatchList.append('Unmatched')

CorrectCount = MatchList.count("Matched")
IncorrectCount = MatchList.count("Unmatched")

LowConf = len([1 for i in ConfidenceList if i < 0.2])

Total = round(CorrectCount / len(MatchList) * 100,2)

NumExamples = len(MatchList)

## Calculate the Averages

In [None]:
RegressionFile['MatchList'] = MatchList
RegressionFile['ConfidenceList'] = ConfidenceList #Look at this before release
MatchedQuestion = RegressionFile[(RegressionFile['MatchList'] == 'Matched')]
UnmatchedQuestion = RegressionFile[(RegressionFile['MatchList'] == 'Unmatched')]
AverageForMatched = round(MatchedQuestion['ConfidenceList'].mean() * 100,2)
AverageForUnmatched = round(UnmatchedQuestion['ConfidenceList'].mean() *100,2)
AverageConfidence = round(RegressionFile['ConfidenceList'].mean() *100,2)


LatestHistoricMatchConfidence.append(AverageForMatched)
LatestHistoricUnmatchConfidence.append(AverageForUnmatched)
LatestHistoricAverageConfidence.append(AverageConfidence)
LatestHistoricCorrectResponse.append(Total)
LatestHistoricDate.append(today)

In [None]:
LatestDict = {'Match Confidence': (LatestHistoricMatchConfidence),
              'Unmatched Confidence':(LatestHistoricUnmatchConfidence),
              'Average Confidence':(LatestHistoricAverageConfidence),
              'Correct Response %':(LatestHistoricCorrectResponse),
              'Clarity':(Clarity),
              'Low Confidence':(LowConf),
              'Number of Examples': (NumExamples),
              'Date':(LatestHistoricDate)
} 

LatestDF = pd.DataFrame(LatestDict)


DifferentialDict = {
    "Intent 1": (IntentList),
    "Confidence 1": (ConfidenceList),
    "Intent 2": (SecondIntent),
    "Confidence 2":(SecondConfidence),
    "Difference": (Differential),
    'Matched': (NumExamples)
    
}

DifferentialDF = pd.DataFrame(DifferentialDict)
DifferentialDF[DifferentialDF.Matched != 'Matched']
RegressionLogDF = pd.concat([HistoricDF,LatestDF], join="inner",)

## Print the last 5 results

In [None]:
RegressionLogDF.tail(5)

## Confusion Matrix

In [None]:
ConfusionMatrixData = {'x_Actual': IntentList,
                       'y_Predicted': ExpectedList 
                         }

DFCM = pd.DataFrame(ConfusionMatrixData, columns=['x_Actual','y_Predicted'])

confusion_matrix = pd.crosstab(DFCM['y_Predicted'], DFCM['x_Actual'], rownames=['Predicted'], colnames=['Actual'], normalize='index')
sns.set(rc={'figure.figsize':(20,20)})
confusionmat = sns.heatmap(confusion_matrix, cmap='viridis',linewidths=.2, square=True,)

## Create a box plot for the average per intent

In [None]:
BoxPlot = {
    "Intent": (IntentList),
    "Confidence": (ConfidenceList)
}

BoxPlot = pd.DataFrame(BoxPlot)
sns.boxplot( y=BoxPlot["Intent"], x=BoxPlot["Confidence"], fliersize=0.1);

## Definition Calculation

In [None]:
DefinitionDF = DifferentialDF.groupby('Intent 1')['Difference'].mean().sort_values(ascending=False)
BarPlot = sns.barplot(y = DifferentialDF['Intent 1'], x = DifferentialDF['Difference'], errwidth=0)

## Save the updated files to include the latest scores

In [None]:
#Create the Regression File to reference in the future

RegressionDict = { 'Question':(QuestionList),
           'Expected':(ExpectedList),
           'Intent':(IntentList),
           'Confidence':(ConfidenceList),
           'Matched': (MatchList),
           'Previous Score':(ScoreList)
               }
finaldf = pd.DataFrame(RegressionDict)


RegressionLogDF.to_csv('RegressionLog.csv', index=False)
DifferentialDF.to_csv('Differential.csv', index=False)