# Autmatically labelling Github issues

In [None]:
#r "nuget: Azure.AI.OpenAI, *-*"

In [None]:
#r "nuget: Octokit, 9.0.0"
#r "nuget: Octokit.Reactive, 9.0.0"

In [None]:
#r "nuget:Microsoft.DotNet.Interactive.AIUtilities, 1.0.0-beta.23552.1"

In [None]:
using Azure;
using Azure.AI.OpenAI;
using Microsoft.DotNet.Interactive;
using Microsoft.DotNet.Interactive.AIUtilities;
using Octokit;

In [None]:
var azureOpenAIKey = await Kernel.GetPasswordAsync("Provide your OPEN_AI_KEY");
var azureOpenAIEndpoint = await Kernel.GetInputAsync("Provide the OPEN_AI_ENDPOINT");
var chatDeployment = await Kernel.GetInputAsync("Provide chat deployment name");
var ebmeddingDeployment = await Kernel.GetInputAsync("Provide chat embedding name");

## Access to GitHub
You will need access token with rights to query and update issues.

In [None]:
var githubKey = await Kernel.GetPasswordAsync("Provide your Github api key");
var repoName = await Kernel.GetInputAsync("Provide repo");
var org = await Kernel.GetInputAsync("Provide org");

In [None]:
OpenAIClient openAIClient = new (new Uri(azureOpenAIEndpoint), new AzureKeyCredential(azureOpenAIKey.GetClearTextPassword()));

In [None]:
var options = new ApiOptions();
var gitHubClient = new GitHubClient(new ProductHeaderValue("notebook"));

if (!string.IsNullOrEmpty(githubKey.GetClearTextPassword())) {
    Console.WriteLine("Using GitHub API token");
    var tokenAuth = new Credentials(githubKey.GetClearTextPassword());
    gitHubClient.Credentials = tokenAuth;
} else {
    Console.WriteLine("Using anonymous GitHub API");
}

In [None]:
var allLabels = await gitHubClient.Issue.Labels.GetAllForRepository(org, repoName);

In [None]:
allLabels.DisplayTable();

In [None]:
var last6Months = new RepositoryIssueRequest
{
    Filter = IssueFilter.All,
    Since = DateTimeOffset.UtcNow.Subtract(TimeSpan.FromDays(30*6))
};
var allIssues = await gitHubClient.Issue.GetAllForRepository(org, repoName);

In [None]:
if(allIssues.Count(i => i.Labels.Count == 0) == 0){
    "No issues without labels, no need to proceed!".Display();
}

In [None]:
public record IssueWithEmbedding(Issue Issue, float[] Embedding);

In [None]:
var issuesWithEmbeddings = new List<IssueWithEmbedding>();

var tokenizer = await Tokenizer.CreateAsync(TokenizerModel.ada2);

foreach(var chunk in allIssues.Chunk(16)){
    var text = chunk.Select(i => i.Title + "\n" + i.Body).Select(s => tokenizer.TruncateByTokenCount(s,8191)).ToArray();
    var response = await openAIClient.GetEmbeddingsAsync(ebmeddingDeployment, new EmbeddingsOptions(text));

    var embeddings = response.Value.Data.Select(e => e.Embedding.ToArray()).ToArray();
    for(var i = 0; i < chunk.Length; i++){
        issuesWithEmbeddings.Add(new IssueWithEmbedding(chunk[i], embeddings[i]));
    }
}

In [None]:
var noLabels = issuesWithEmbeddings.Where(i => i.Issue.Labels.Count == 0).ToList();
var labelled = issuesWithEmbeddings.Where(i => i.Issue.Labels.Count > 0).ToList();

In [None]:
public class LabelWithEmbeddings{
    public Label Label {get;set;}
    public float[] Embedding {get;set;}
    public List<IssueWithEmbedding> Issues {get;init ;} = new();
}

In [None]:
var labelsWithEmbeddings = new List<LabelWithEmbeddings>();

In [None]:
foreach(var label in allLabels.Where(e => e.Name.Contains("Area-"))){
    var issues = labelled.Where(i => i.Issue.Labels.Any(l => l.Name == label.Name)).ToList();
    if(issues.Count > 0){
        var labelWithEmbeddings = new LabelWithEmbeddings{
            Label = label,
            Issues = issues
        };
       labelsWithEmbeddings.Add(labelWithEmbeddings);
    }
}

In [None]:
foreach(var label in labelsWithEmbeddings){
    var centroid =  label.Issues.Select(i => i.Embedding).Aggregate((Enumerable.Repeat<float>(0f, 1536), 0), (acc, d) => (acc.Item1.Zip(d, (a, b) => a + b).ToArray(), acc.Item2 + 1));
    var embedding  = (centroid.Item1.Select(c => c / centroid.Item2)).ToArray();
    label.Embedding = embedding;
}

In [None]:
var suggestions = new Dictionary<IssueWithEmbedding, LabelWithEmbeddings[]>();
foreach(var issue in noLabels){
    var suggestedLabels = labelsWithEmbeddings.ScoreBySimilarityTo(issue.Embedding, new CosineSimilarityComparer<float[]>(f => f), l => l.Embedding)
    .OrderByDescending( s => s.Value)
    .Where(s => s.Value > 0.85)
    .Take(5)
    .ToArray();
    suggestions.Add(issue, suggestedLabels.Select(s => s.Key).ToArray());
}

In [None]:
suggestions.Select(e => new {
    Issue = e.Key.Issue.Title,
    SuggestedLabels = e.Value.Select(l => l.Label.Name).ToArray()

}).DisplayTable();