## Import Libraries

In [None]:
from IPython.display import HTML
from ibm_watson import AssistantV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import pandas as pd    
import json
import time
from tqdm import tqdm
import seaborn as sns
import matplotlib as plt
import matplotlib.pyplot as plt
from datetime import date

## API Details

In [None]:
authenticator = IAMAuthenticator('')
assistant = AssistantV2(
    version='2020-04-01',
    authenticator = authenticator
)
assistant.set_service_url('https://api.eu-gb.assistant.watson.cloud.ibm.com')
assistant_id = ''

In [None]:
ResponseList = []
IntentList = []
QuestionList = []
ConfidenceList = []
ExpectedList = []
SecondIntent = []
SecondConfidence = []


today = date.today()


RegressionFile = pd.read_csv("Regression.csv")
RegressionLog = pd.read_csv("RegressionLog.csv")


## Load Historic files into lists

In [None]:
#Load files into useable lists for the regression file


QuestionList = RegressionFile['Question'].tolist()
ExpectedList = RegressionFile['Expected'].tolist()
ScoreList = RegressionFile['Previous Score'].tolist()

#Load in historic results.
HistoricMatchConfidence = RegressionLog['Match Confidence']
HistoricUnmatchConfidence = RegressionLog['Unmatched Confidence']
HistoricAverageConfidence = RegressionLog['Average Confidence']
HistoricCorrectResponse = RegressionLog['Correct Response %']
HistoricLowConfidence = RegressionLog['Low Confidence']
HistoricDate = RegressionLog['Date']

## Send questions to Watson and log the response

In [None]:
 
def Sender():
    response = assistant.message_stateless(
    assistant_id,
    input={
        'message_type': 'text',
        'text': x,
     'options': {
            'return_context': True
        }
    },
    context={
        'skills': {
            'main skill': {
                'user_defined': {
                    'ConfidenceTester': 'Yes'
                }
            }
        }
    }
).get_result()
    pbar.update(1) #Updates the progressbar
    IntentList.append((response['context']['skills']['main skill']['user_defined']['Intent']['intent']))
    ConfidenceList.append((response['context']['skills']['main skill']['user_defined']['Intent']['confidence']))
    SecondIntent.append((response['context']['skills']['main skill']['user_defined']['Intent1']['intent']))
    SecondConfidence.append((response['context']['skills']['main skill']['user_defined']['Intent1']['confidence']))

## Loop through and show progress bar

In [None]:

pbar = tqdm(total=len(QuestionList),desc="Percentage complete") 
for x in QuestionList:
    Sender()
pbar.close()

## Turn all the data into something usable

In [None]:
MatchList = []
ConfidenceChangeList = []

#Turn lists into one case so they're easy to match

DifferentialDict = {
    "Intent 1": (IntentList),
    "Confidence 1": (ConfidenceList),
    "Intent 2": (SecondIntent),
    "Confidence 2":(SecondConfidence)
    
}


DifferentialDF = pd.DataFrame(DifferentialDict)

Differential = DifferentialDF["Confidence 1"] - DifferentialDF["Confidence 2"]

DifferentialDict = {
    "Intent 1": (IntentList),
    "Confidence 1": (ConfidenceList),
    "Intent 2": (SecondIntent),
    "Confidence 2":(SecondConfidence),
    "Difference": (Differential)
    
}


DifferentialDF = pd.DataFrame(DifferentialDict)



IntentList = [item.lower() for item in IntentList] 
ExpectedList = [item.lower() for item in ExpectedList]


if len(IntentList) == len(ExpectedList):
    for i in range(len(IntentList)):
        if IntentList[i] == ExpectedList[i]:
         MatchList.append('Matched')
        else: 
         MatchList.append('Unmatched')

CorrectCount = MatchList.count("Matched")
IncorrectCount = MatchList.count("Unmatched")
LowconfidenceCount = IntentList.count('zlowconf')

Total = (CorrectCount + IncorrectCount)
Total = CorrectCount / Total
Total = round(Total * 100,2)


RegressionFile['MatchList'] = MatchList
RegressionFile['ConfidenceList'] = ConfidenceList #Look at this before release
MatchedQuestion = RegressionFile[(RegressionFile['MatchList'] == 'Matched')]
UnmatchedQuestion = RegressionFile[(RegressionFile['MatchList'] == 'Unmatched')]
AverageForMatched = round(MatchedQuestion['ConfidenceList'].mean() * 100,2)
AverageForUnmatched = round(UnmatchedQuestion['ConfidenceList'].mean() *100,2)
AverageConfidence = round(RegressionFile['ConfidenceList'].mean() *100,2)

#Convert the data from floats to Strings
AverageForMatched = str(AverageForMatched)
AverageForUnmatched = str(AverageForUnmatched)
AverageConfidence = str(AverageConfidence)
Total = str(Total)
LowconfidenceCount = str(LowconfidenceCount)

LatestHistoricMatchConfidence = []
LatestHistoricUnmatchConfidence = []
LatestHistoricAverageConfidence = []
LatestHistoricCorrectResponse = []
LatestHistoricLowConfidence = []
LatestHistoricDate = []


LatestHistoricMatchConfidence.append(AverageForMatched)
LatestHistoricUnmatchConfidence.append(AverageForUnmatched)
LatestHistoricAverageConfidence.append(AverageConfidence)
LatestHistoricCorrectResponse.append(Total)
LatestHistoricDate.append(today)

LatestDict = {'Match Confidence': (LatestHistoricMatchConfidence),
              'Unmatched Confidence':(LatestHistoricUnmatchConfidence),
              'Average Confidence':(LatestHistoricAverageConfidence),
              'Correct Response %':(LatestHistoricCorrectResponse),
              'Low Confidence':(LowconfidenceCount),
              'Date':(LatestHistoricDate)
} 

LatestDF = pd.DataFrame(LatestDict)

HistoricDict = { 'Match Confidence':(HistoricMatchConfidence),
                 'Unmatched Confidence':(HistoricUnmatchConfidence),
                 'Average Confidence':(HistoricAverageConfidence),
                 'Correct Response %':(HistoricCorrectResponse),
                 'Low Confidence':(HistoricLowConfidence),
                 'Date':(HistoricDate)
               }

DifferentialDict = {
    "Intent 1": (IntentList),
    "Confidence 1": (ConfidenceList),
    "Intent 2": (SecondIntent),
    "Confidence 2":(SecondConfidence),
    "Difference": (Differential),
    'Matched': (MatchList)
    
}


DifferentialDF = pd.DataFrame(DifferentialDict)


#Save Differential Raw data to a file.

DifferentialDF.to_csv('DifferentialBreakdown.csv', index=False)


DifferentialDF[DifferentialDF.Matched != 'Matched']

HistoricDF = pd.DataFrame(HistoricDict)
JoinedDF = pd.concat([HistoricDF,LatestDF], join="inner")



#Average per intent
LowConfidenceRemoved = IntentList
pd.set_option('display.max_rows', 500)

UniqueList = list(dict.fromkeys(IntentList))
AverageDict = {
    'Intent': IntentList,
    'Confidence': ConfidenceList
}

AverageDF = pd.DataFrame(AverageDict)



In [None]:
print(JoinedDF)

In [None]:
MeanDF = AverageDF.groupby(['Intent']).mean()
print(MeanDF)

In [None]:
IntentCountDF = AverageDF.groupby(['Intent']).count()
print(IntentCountDF)

## Definition Calculation

In [None]:
'''

This calculates the difference betweeen the first intent and the second intent to show how clearly defined the
winning and correct intent is vs it's nearest competitor and then we average that out across each intent

'''


DefinitionDF = DifferentialDF.groupby('Intent 1')['Difference'].mean().sort_values(ascending=False)

print(DefinitionDF)




## Confusion Matrix

In [None]:
ConfusionMatrixData = {'y_Actual': LowConfidenceRemoved,
                       'y_Predicted': ExpectedList 
                         }

DFCM = pd.DataFrame(ConfusionMatrixData, columns=['y_Actual','y_Predicted'])

confusion_matrix = pd.crosstab(DFCM['y_Predicted'], DFCM['y_Actual'], rownames=['Actual'], colnames=['Predicted'], normalize='index')
sns.set(rc={'figure.figsize':(20,20)})
confusionmat = sns.heatmap(confusion_matrix, cmap='viridis',linewidths=.2, square=True,)
fig = confusionmat.get_figure()

## Save the updated files to incldue the latest scores

In [None]:
#Create the Regression File to reference in the future

RegressionDict = { 'Question':(QuestionList),
           'Expected':(ExpectedList),
           'Intent':(IntentList),
           'Confidence':(ConfidenceList),
           'Matched': (MatchList),
           'Previous Score':(ScoreList)
               }
finaldf = pd.DataFrame(RegressionDict)
finaldf.to_csv('Regression.csv', index=False)


Historic = {'Match Confidence':(HistoricMatchConfidence),
            'Unmatched Confidence':(HistoricUnmatchConfidence),
            'Average Confidence':(HistoricAverageConfidence),
            'Correct Response %':(HistoricCorrectResponse),
            'Low Confidence':(HistoricLowConfidence),
            'Date':(HistoricDate)
               }

HistoricDF = pd.DataFrame(Historic)
RegressionLogDF = pd.concat([HistoricDF,LatestDF], join="inner")
RegressionLogDF.to_csv('RegressionLog.csv', index=False)

## Create a box plot for the average per intent

In [None]:
BoxPlot = {
    "Intent": (IntentList),
    "Confidence": (ConfidenceList)
}

BoxPlot = pd.DataFrame(BoxPlot)

fig = plt.figure(figsize=(25,10))
sns.boxplot( y=BoxPlot["Intent"], x=BoxPlot["Confidence"]);
sns.stripplot(x=BoxPlot["Confidence"], y=BoxPlot["Intent"],
              size=4, color=".1", linewidth=0)
fig.savefig('boxplot.jpg', bbox_inches='tight', dpi=150)
plt.show()
