In [1]:
import pandas as pd
from transformers import pipeline, AutoTokenizer

In [2]:
# Load the Pre-trained Model and its Tokenizer
model_path = './model_local'
tokenizer = AutoTokenizer.from_pretrained(model_path)
classifier = pipeline("text-classification", model=model_path, tokenizer=tokenizer, truncation=True)

In [3]:
classifier("after user sing up the email must be confirmed prepare a goodlooking email template develop a be to send this template after user registration develop an endpoint to check confirmation token develop a fe page with confirmation status create a cron to check accounts which havent confirmed an email and remove them")

[{'label': 'LABEL_0', 'score': 0.7775368094444275}]

In [4]:
classifier("description of the problem when i try to perform file upload in sut via the browsers running in docker the file upload popup is not opening and neither accepts any files causing scripts to fail browser and version safari operating system ran on docker webdrivermanager version v503")

[{'label': 'LABEL_0', 'score': 0.9122573733329773}]

In [5]:
classifier("descriptive summary hyrax 302 makes rails 526 support official and rails 526 includes a security update for polymorphicpath which is a method used in all link generation across rails linkto urlfor etc all these methods no longer support string parameters which must be converted to symbols this is a problem in 526 and 5246 our current 525 is unaffected and upgrading may see problems on random pages we should check through the application for calls to polymorphicpath urlfor linkto and any other link generating methods you can think of all hyrax links should be fixed in 302 then we can safely update to hyrax 302 and rails 526 expected behavior all links on site call their generation method with symbols or objects not strings hyrax 302 rails 526 related work blocked 1578 from going to 302 accessibility concerns")

[{'label': 'LABEL_0', 'score': 0.9983401298522949}]

In [6]:
df = pd.read_csv("csv/TD_dataset_clean.csv" , index_col = 0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 127247 entries, 0 to 127246
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   text    126306 non-null  object
 1   label   127247 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 2.9+ MB


In [7]:
df['text'] = df['text'].astype(str)
type(df['text'].iloc[0])

str

In [8]:
df = df[df['label']==1]
df = df.reset_index(drop=True)
df

Unnamed: 0,text,label
0,we didnt have time to do automated tests or gh...,1
1,user storyas a sre i would like to know that ...,1
2,background a while ago we had to change our bu...,1
3,as a developer\ri need a badge in readmemd fil...,1
4,subtasks\r refactor the imports\r export th...,1
...,...,...
56026,id like for us to add at least one test where ...,1
56027,some of the spikes on kahuna graphs seem to be...,1
56028,in gitlab by warsaw on dec nice if youre g...,1
56029,we have pretty similar code in scattered acros...,1


In [9]:
df.rename(columns={'label': 'td_label'}, inplace=True)


In [10]:
df_small = df.iloc[:5000]
df_small

Unnamed: 0,text,td_label
0,we didnt have time to do automated tests or gh...,1
1,user storyas a sre i would like to know that ...,1
2,background a while ago we had to change our bu...,1
3,as a developer\ri need a badge in readmemd fil...,1
4,subtasks\r refactor the imports\r export th...,1
...,...,...
4995,for vue components see \rfor the plain js code...,1
4996,this issue is tracking obtaining benchmark dat...,1
4997,add make clean to clean the intermedia files g...,1
4998,currently pulling feed from webstame as an ifr...,1


In [11]:
# Function to get predictions and scores
def get_predictions(text):
    result = classifier(text)
    return result[0]['label'], result[0]['score']

# Apply the classifier to each row
df[['high_priority_label', 'high_priority_score']] = df.apply(lambda row: pd.Series(get_predictions(row['text'])), axis=1)
df

Unnamed: 0,text,td_label,high_priority_label,high_priority_score
0,we didnt have time to do automated tests or gh...,1,LABEL_0,0.539607
1,user storyas a sre i would like to know that ...,1,LABEL_1,0.932910
2,background a while ago we had to change our bu...,1,LABEL_0,0.956235
3,as a developer\ri need a badge in readmemd fil...,1,LABEL_1,0.970458
4,subtasks\r refactor the imports\r export th...,1,LABEL_0,0.931092
...,...,...,...,...
56026,id like for us to add at least one test where ...,1,LABEL_1,0.924757
56027,some of the spikes on kahuna graphs seem to be...,1,LABEL_1,0.939248
56028,in gitlab by warsaw on dec nice if youre g...,1,LABEL_1,0.739150
56029,we have pretty similar code in scattered acros...,1,LABEL_1,0.924680


In [15]:
# Function to assign the new name based on high_priority_label
def assign_priority_name(label):
    if label == 'LABEL_0':
        return 'High Priority'
    elif label == 'LABEL_1':
        return 'Not High Priority'
    else:
        return 'Unknown'  # You can change this to handle other unexpected labels

# Apply the function to each row
df['priority_name'] = df['high_priority_label'].apply(assign_priority_name)
df

Unnamed: 0,text,td_label,high_priority_label,high_priority_score,priority_name
0,we didnt have time to do automated tests or gh...,1,LABEL_0,0.539607,High Priority
1,user storyas a sre i would like to know that ...,1,LABEL_1,0.932910,Not High Priority
2,background a while ago we had to change our bu...,1,LABEL_0,0.956235,High Priority
3,as a developer\ri need a badge in readmemd fil...,1,LABEL_1,0.970458,Not High Priority
4,subtasks\r refactor the imports\r export th...,1,LABEL_0,0.931092,High Priority
...,...,...,...,...,...
56026,id like for us to add at least one test where ...,1,LABEL_1,0.924757,Not High Priority
56027,some of the spikes on kahuna graphs seem to be...,1,LABEL_1,0.939248,Not High Priority
56028,in gitlab by warsaw on dec nice if youre g...,1,LABEL_1,0.739150,Not High Priority
56029,we have pretty similar code in scattered acros...,1,LABEL_1,0.924680,Not High Priority


In [16]:
average_scores = df.groupby('priority_name')['high_priority_score'].mean()
average_scores

priority_name
High Priority        0.828186
Not High Priority    0.861253
Name: high_priority_score, dtype: float64