In [None]:
!pip install pydriller
!pip install -q -U google-generativeai pydriller
import pydriller
import pandas as pd
import os
import json

Collecting pydriller
  Downloading PyDriller-2.6-py3-none-any.whl (33 kB)
Collecting gitpython (from pydriller)
  Downloading GitPython-3.1.42-py3-none-any.whl (195 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting lizard (from pydriller)
  Downloading lizard-1.17.10-py2.py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.0/66.0 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gitdb<5,>=4.0.1 (from gitpython->pydriller)
  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython->pydriller)
  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected packages: lizard, smmap, gitdb, gitpython, pydriller
Successfully installed gitdb-4.0.11 gitpython-3.1.42 lizard-

In [None]:
import requests
import time

class GitHubHandler:
    def __init__(self, api_keys):
        self.api_keys = api_keys
        self.current_key_index = 0
        self.rate_limit_wait_time = 60
        self.max_retries = 3
        self.retryable_exceptions = (
            requests.exceptions.RequestException,
            requests.exceptions.ConnectionError,
            requests.exceptions.Timeout,
            requests.exceptions.HTTPError,
        )
        self.reset_times = {}

    def _key_handler(self):
        if len(self.api_keys) == 0:
            print("No API keys available")
            return None

        key = self.api_keys[self.current_key_index]
        self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
        return key

    def get_rate_limit_info(self,response):
        rate_limit_info = {
            "limit": int(response.headers["X-RateLimit-Limit"]),
            "remaining": int(response.headers["X-RateLimit-Remaining"]),
            "reset": int(response.headers["X-RateLimit-Reset"]),
        }
        return rate_limit_info

    def get(self, url, headers=None, params=None):
        if headers is None:
            headers = {}

        for attempt in range(1, self.max_retries + 1):
            try:
                headers['Authorization'] = f"token {self._key_handler()}"
                response = requests.get(url, headers=headers, params=params)
                response.raise_for_status()

                rate_limit_info = self.get_rate_limit_info(response)
                print(f"Rate limit info: {rate_limit_info}")
                if rate_limit_info['remaining'] == 0:
                    print(f"Rate limit reached. Waiting for {self.rate_limit_wait_time} seconds...")
                    time.sleep(self.rate_limit_wait_time)
                    continue

                if "GitHub-Authentication-Token-Expiration" in response.headers:
                    expiration_time = int(response.headers["GitHub-Authentication-Token-Expiration"])
                    self.reset_times[self.api_keys[self.current_key_index - 1]] = expiration_time
                    print(f"Key expires at {expiration_time}")

                current_key = self.api_keys[self.current_key_index - 1]
                if current_key in self.reset_times and time.time() > self.reset_times[current_key]:
                    del self.api_keys[self.current_key_index - 1]
                    del self.reset_times[current_key]
                    print(f"Key {current_key} expired. Removing.")
                    continue

                return response
            except self.retryable_exceptions as e:
                if attempt == self.max_retries:
                    print(f"Attempt {attempt}/{self.max_retries}: {e.__class__.__name__}")
                    if response:
                        print(f"Error details: {response.text}")
                    raise
                else:
                    print(f"Attempt {attempt}/{self.max_retries}: {e.__class__.__name__} occurred. Retrying...")

        return None

api_keys = ["", "", ""]
gh = GitHubHandler(api_keys)


In [None]:

def get_all_issues(url):
    items = []
    page = 1
    while True:
        params = {"per_page": 100, "state": "closed"}
        response = gh.get(url+"/issues", params=params)
        response.raise_for_status()  # Raise an exception for non-200 status codes
        data = response.json()
        if not data:
            break
        else :
            print(f"Fetched {len(data)} issues from page {page}")
        items.extend(data)
        page += 1
    return items
url = "https://api.github.com/repos/shosetsuorg/shosetsu"
gh_issues = get_all_issues(url)


Attempt 1/3: HTTPError occurred. Retrying...
Attempt 2/3: HTTPError occurred. Retrying...
Attempt 3/3: HTTPError


HTTPError: 403 Client Error: Forbidden for url: https://api.github.com/repos/shosetsuorg/shosetsu/issues?per_page=100&state=closed

In [None]:
with open("shosetsu_issues.json", "w") as f:
    json.dump(gh_issues, f)

In [None]:
for issue_gh in gh_issues:
    if issue_gh["comments"]>0:
        comments = gh.get(issue_gh["comments_url"])
        issue_gh["comment_data"] = comments.json()
    else:
        issue_gh["comment_data"] = []
with open("shosetsu_issues_w_comments.json", "w") as f:
    json.dump(gh_issues, f)

Rate limit info: {'limit': 60, 'remaining': 58, 'reset': 1710436269}
Rate limit info: {'limit': 60, 'remaining': 58, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 58, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 57, 'reset': 1710436269}
Rate limit info: {'limit': 60, 'remaining': 57, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 57, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 56, 'reset': 1710436269}
Rate limit info: {'limit': 60, 'remaining': 56, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 56, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 55, 'reset': 1710436269}
Rate limit info: {'limit': 60, 'remaining': 55, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 55, 'reset': 1710436270}
Rate limit info: {'limit': 60, 'remaining': 54, 'reset': 1710436269}
Rate limit info: {'limit': 60, 'remaining': 54, 'reset': 1710436270}
Rate limit info: {'limit': 60, 're

In [None]:
issue_body_w_comments = {}
issue_body_array = []
for issue in gh_issues:
    stra = ""
    a = issue["title"] if issue["title"]!=None else " "
    b = issue["body"] if issue["body"]!=None else " "
    stra += a + " " + b
    for comment in issue["comment_data"]:
        stra += "\n"+comment["body"]
    issue_body_w_comments[issue["number"]] = stra
    issue_body_array.append([issue['number'], stra])

with open("shosetsu_issues_w_comments_body.json", "w") as f:
    json.dump(issue_body_w_comments, f)


NameError: name 'gh_issues' is not defined

#Gemini issue-issue mapping

#AI class

In [None]:
from abc import ABC, abstractmethod
from IPython.display import display
from IPython.display import Markdown
import os
import google.generativeai as genai
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))


class AIModel:
  @abstractmethod
  def train(self):
    pass

  @abstractmethod
  def prompt(self,text):
    pass



def save_text_to_file(file_path, text):
    """Saves text to a file, creating the directory if necessary."""

    os.makedirs(os.path.dirname(file_path), exist_ok=True)  # Create directory if needed

    with open(file_path, "w") as file:
        file.write(text)

# # Example usage:
# file_path = "path/to/your/file.txt"  # Replace with the desired file path
# text = "This is the text to be saved."

# save_text_to_file(file_path, text)

# print("Text saved successfully!")





class OpenAIModel(AIModel):
    def __init__(self,keys,model_type):
      self.keys = keys
      self.model_type = model_type
    def train(self):
      print('Nothing')
    def prompt(self,text):
      OPENAI_API_KEY0 = self.keys[0]
      client = OpenAI(api_key=OPENAI_API_KEY0)
      completion = client.chat.completions.create(
      model=self.model_type,
      messages=[
          {"role": "system", "content": text['prompt1']},
          {"role": "user", "content": text['prompt2']}
      ]
      )
      answer = completion.choices[0].message.content
      # save_text_to_file('/prompts.txt',text[prompt2].encode())
      # save_text_to_file('/outputs.txt',answer)
      return answer


class GeminiAIModel(AIModel):
  def __init__(self,keys,model_type,retention=True):
    self.keys = keys
    self.model_type = model_type
    self.retention = retention
    self.models=[]
    self.iter = 0
    for i in self.keys:
      genai.configure(api_key=i)
      model = genai.GenerativeModel(self.model_type)
      chat = model.start_chat(history=[]) if self.retention  else  model.generate_content()
      self.models.append(chat)

  def train(self,one_shot_prompt):
    index = 0
    for i in self.models:
      i.send_message(one_shot_prompt)
      print(index)
      index += 1

  def prompt(self,text):
    response = self.models[self.iter % len(self.models)].send_message(text)
    self.iter += 1
    # save_text_to_file('/prompts.txt',text.encode())
    # save_text_to_file('/outputs.txt',response.text)
    return response.text

#Mapping code

In [None]:
GeminiModel = GeminiAIModel([""], 'gemini-pro')
GeminiModel.train("You'll receive two issues with their number and body (body of the issue and the comments in it), you have to compare and find how similar(related) they are, and return a number from 1-100 representing the same ")

0


In [None]:
# Open the file in append mode so that existing content is not overwritten
with open('issue_mapping.txt', 'a') as file:
    for i in range(len(issue_body_array)):
        for j in range(i+1,len(issue_body_array)):
          issue1_no, issue1_content = issue_body_array[i]
          issue2_no, issue2_content = issue_body_array[j]
          if issue1_no == issue2_no:
              continue
          similarity_index = GeminiModel.prompt("Here are the two issues, Ist: Issue Number {} Body(with comments) {} IInd: Issue Number {} Body(with comments) {}, Just give me the % number (integer) as output, dont' give any explanation.".format(issue1_no, issue1_content, issue2_no, issue2_content))
          print(similarity_index)
          file.write("{},{}: {}\n".format(issue1_no, issue2_no, similarity_index))
        break


NameError: name 'issue_body_array' is not defined

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_index_matrix(issue_body_w_comments):
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(issue_body_w_comments.values())
    cosine_sim = cosine_similarity(X, X)
    issue_keys = list(issue_body_w_comments.keys())
    return cosine_sim, issue_keys



cosine_matrix, issue_keys = cosine_index_matrix(issue_body_w_comments)

In [None]:
print(cosine_matrix)

[[1.         0.00785506 0.10388417 ... 0.14472926 0.0839042  0.07344177]
 [0.00785506 1.         0.04640425 ... 0.05006066 0.0379262  0.10406602]
 [0.10388417 0.04640425 1.         ... 0.09194114 0.06464771 0.06525204]
 ...
 [0.14472926 0.05006066 0.09194114 ... 1.         0.06939879 0.09911186]
 [0.0839042  0.0379262  0.06464771 ... 0.06939879 1.         0.01210468]
 [0.07344177 0.10406602 0.06525204 ... 0.09911186 0.01210468 1.        ]]


In [None]:
print(issue_keys)

[242, 241, 240, 237, 236, 235, 234, 232, 231, 230, 224, 220, 217, 216, 215, 213, 212, 211, 210, 208, 206, 205, 204, 203, 192, 190, 187, 186, 180, 177, 175, 174, 172, 171, 169, 168, 167, 166, 164, 163, 161, 160, 159, 157, 155, 153, 151, 150, 149, 148, 147, 146, 145, 144, 143, 141, 140, 138, 137, 136, 133, 132, 131, 130, 129, 128, 126, 125, 124, 121, 120, 119, 118, 116, 115, 114, 112, 109, 108, 107, 106, 105, 104, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 89, 86, 85, 83, 82, 81, 80, 79, 77, 76, 75, 74, 73, 72, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 4, 3, 2, 1]


In [None]:
def get_similar_issues(cosine_matrix, issue_keys, issue_number, top_n=5):
    index = issue_keys.index(issue_number)
    print(index)
    similar_issues = list(cosine_matrix[index].argsort()[-top_n-1:-1])
    similar_issues.reverse()
    return similar_issues





In [None]:
issue_number_we_want_to_find_similar_issues_for = 1
similar_issue = get_similar_issues(cosine_matrix, issue_keys, issue_number_we_want_to_find_similar_issues_for)

index = issue_keys.index(issue_number_we_want_to_find_similar_issues_for)

print(f"issue #{issue_number_we_want_to_find_similar_issues_for}:",issue_body_w_comments[index])

for issue in similar_issue:
    print("Similar issues:",	issue_keys[issue])
    print(issue_body_w_comments[issue_keys[issue]])

174
issue #1: [F-R] Compiling notifications **Is your feature request related to a problem? Please describe.**
When downloading chapters, each individual chapter gets its own notifcation .


**Describe the solution you'd like**
For all notifications coming from shosetsu to have one major notifcation tab, and it can have a drop down window to elaborate each notifcation

**Additional context**
This is what I mean by each download having it's own notifcation tab.
![Screenshot_20211219-105316_Shosetsu](https://user-images.githubusercontent.com/59666465/146662118-6f8cbd08-138f-4784-9024-9cb63a4b6857.jpg)

Similar issues: 163
[Bug] [2.0.0-1677] Offline extension install attempt blocks future online extension installs **Describe the bug**
Attempting to install an extension while offline will bring the app into a state where it is unable to install extensions, even after it's brought back online.

**To Reproduce**
Steps to reproduce the behavior:
0. optional: clear app data for a clean test
1.

In [None]:
print(cosine_matrix.shape)

(175, 175)


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import defaultdict

def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)
    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return tokens

def context_matching_similarity(issue1, issue2):
    # Preprocess texts
    tokens1 = set(preprocess_text(issue1))
    tokens2 = set(preprocess_text(issue2))
    # Calculate Jaccard similarity
    intersection = len(tokens1.intersection(tokens2))
    union = len(tokens1.union(tokens2))
    return intersection / union if union != 0 else 0

def build_context_matching_matrix(issues):
    num_issues = len(issues)
    matrix = defaultdict(dict)
    for i in range(num_issues):
        for j in range(i+1, num_issues):
            similarity = context_matching_similarity(issues[i], issues[j])
            matrix[i][j] = similarity
            matrix[j][i] = similarity
    return matrix

# Usage
issue_texts = list(issue_body_w_comments.values())
context_matching_matrix = build_context_matching_matrix(issue_texts)



In [None]:
print(context_matching_matrix)

defaultdict(<class 'dict'>, {0: {1: 0.0, 2: 0.016666666666666666, 3: 0.0, 4: 0.005405405405405406, 5: 0.005405405405405406, 6: 0.013157894736842105, 7: 0.01, 8: 0.75, 9: 0.0, 10: 0.0, 11: 0.018518518518518517, 12: 0.010526315789473684, 13: 0.013888888888888888, 14: 0.0, 15: 0.012345679012345678, 16: 0.05555555555555555, 17: 0.021739130434782608, 18: 0.018867924528301886, 19: 0.008403361344537815, 20: 0.0, 21: 0.016129032258064516, 22: 0.018518518518518517, 23: 0.023809523809523808, 24: 0.01, 25: 0.011904761904761904, 26: 0.017241379310344827, 27: 0.021739130434782608, 28: 0.019230769230769232, 29: 0.0, 30: 0.0, 31: 0.0, 32: 0.012048192771084338, 33: 0.0, 34: 0.0, 35: 0.0, 36: 0.009708737864077669, 37: 0.0, 38: 0.0, 39: 0.007633587786259542, 40: 0.0035335689045936395, 41: 0.012345679012345678, 42: 0.009708737864077669, 43: 0.006756756756756757, 44: 0.008733624454148471, 45: 0.005847953216374269, 46: 0.0, 47: 0.01, 48: 0.0, 49: 0.0, 50: 0.015384615384615385, 51: 0.0, 52: 0.0, 53: 0.0, 54

In [None]:
print(context_matching_matrix[4])

{0: 0.005405405405405406, 1: 0.018518518518518517, 2: 0.043668122270742356, 3: 0.0861244019138756, 5: 0.11314984709480122, 6: 0.08050847457627118, 7: 0.08560311284046693, 8: 0.0, 9: 0.02702702702702703, 10: 0.0410958904109589, 11: 0.07373271889400922, 12: 0.04580152671755725, 13: 0.07725321888412018, 14: 0.043859649122807015, 15: 0.061224489795918366, 16: 0.031413612565445025, 17: 0.056338028169014086, 18: 0.06422018348623854, 19: 0.07581227436823104, 20: 0.059113300492610835, 21: 0.04782608695652174, 22: 0.05909090909090909, 23: 0.03271028037383177, 24: 0.09411764705882353, 25: 0.0778688524590164, 26: 0.05803571428571429, 27: 0.06132075471698113, 28: 0.04524886877828054, 29: 0.03664921465968586, 30: 0.03626943005181347, 31: 0.06132075471698113, 32: 0.056451612903225805, 33: 0.055793991416309016, 34: 0.05092592592592592, 35: 0.07224334600760456, 36: 0.0888030888030888, 37: 0.051643192488262914, 38: 0.05092592592592592, 39: 0.08391608391608392, 40: 0.0794392523364486, 41: 0.069958847736

#GESNIM

In [None]:
from gensim import corpora, models, similarities

def build_topic_model(issue_texts):
    # Tokenize and preprocess texts
    tokenized_texts = [preprocess_text(text) for text in issue_texts]
    # Create dictionary and corpus
    dictionary = corpora.Dictionary(tokenized_texts)
    corpus = [dictionary.doc2bow(tokens) for tokens in tokenized_texts]
    # Train LDA model
    lda_model = models.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=15)
    return lda_model

# Usage
lda_model = build_topic_model(issue_texts)


In [None]:
print(lda_model.print_topics(num_words=5))

[(0, '0.053*"." + 0.032*"," + 0.023*":" + 0.019*"]" + 0.019*"["'), (1, '0.163*"*" + 0.055*"." + 0.037*":" + 0.020*"-" + 0.020*","'), (2, '0.066*"*" + 0.029*"." + 0.021*"update" + 0.020*"," + 0.015*"describe"'), (3, '0.042*"." + 0.025*"," + 0.017*":" + 0.012*">" + 0.010*"["'), (4, '0.055*"." + 0.042*"," + 0.024*">" + 0.024*"chapter" + 0.015*":"'), (5, '0.028*"," + 0.020*"." + 0.016*"shosetsu" + 0.008*"tried" + 0.007*"release"'), (6, '0.034*"," + 0.032*"(" + 0.031*")" + 0.026*"." + 0.023*":"'), (7, '0.068*")" + 0.068*"(" + 0.035*"#" + 0.031*":" + 0.017*"$"'), (8, '0.034*"|" + 0.029*":" + 0.027*"]" + 0.027*"[" + 0.023*"http"'), (9, '0.025*":" + 0.022*"`" + 0.017*"," + 0.017*"." + 0.013*"]"')]
