In [25]:
import string
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import  stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher
import warnings
warnings.filterwarnings('ignore')
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spacy
# Load a pre-trained Sentence Transformer model
model = SentenceTransformer('paraphrase-distilroberta-base-v1')
nlp = spacy.load('en_core_web_sm')



In [26]:
# nltk.download('stopwords')
# nltk.download('wordnet')
# nltk.download('words')

**preprocessing**

In [27]:
data = pd.read_csv('./Statedriven.csv', encoding='unicode-escape')
# data

In [28]:
data.columns
# printing all the columns headers

Index(['Requirement with Standard Syntax'], dtype='object')

In [29]:
data = data[['Requirement with Standard Syntax']]
data['Cleaned Data']=data['Requirement with Standard Syntax']
# selecting only the required columns

In [30]:
data.dropna(inplace=True)
# dropping the rows with null values in any column


In [31]:

# create a new column with null values
data['sentence1'] = data['Requirement with Standard Syntax'].isnull()
data['sentence2'] = data['Requirement with Standard Syntax'].isnull()
data['similarity_score'] = data['Requirement with Standard Syntax'].isnull()
data['SimilarSentence'] = data['Requirement with Standard Syntax'].isnull()

# data


In [32]:
data['Cleaned Data'] = data['Cleaned Data'].str.lower()
data['Cleaned Data'] = data['Cleaned Data'].str.replace(r'^.+@[^\.].*\.[a-z]{2,}$', 'emailaddress')
data['Cleaned Data'] = data['Cleaned Data'].str.replace(r'^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$', 'webaddress')
data['Cleaned Data'] = data['Cleaned Data'].str.replace(r'£|\$', 'dollers')
data['Cleaned Data'] = data['Cleaned Data'].str.replace(r'^\(?[\d]{3}\)?[\s-]?[\d]{3}[\s-]?[\d]{4}$', 'phonenumber')
data['Cleaned Data'] = data['Cleaned Data'].str.replace(r'\d+(\.\d+)?', 'numbr')
data['Cleaned Data'] = data['Cleaned Data'].apply(lambda x: ' '.join(term for term in x.split() if term not in string.punctuation))
stop_words = set(stopwords.words('english') + ['u', 'ü', 'ur', '4', '2', 'im', 'dont', 'doin', 'ure'])
data['Cleaned Data'] = data['Cleaned Data'].apply(lambda x: ' '.join(term for term in x.split() if term not in stop_words))
lemmatizer = WordNetLemmatizer()
data['Cleaned Data'] = data['Cleaned Data'].apply(lambda x: ' '.join(lemmatizer.lemmatize(t) for t in x.split()))

data['Cleaned Data']


0     user register system, system shall allow user ...
1           game numbrp resolution, game shall playable
2     user access his/her account, system shall allo...
3     user play tournament, system shall allow user ...
4     user participate respective tournament, system...
                            ...                        
65    player find hidden collectible, unlock additio...
66     player solves puzzle, new area pathway revealed.
67    player trigger in-game event, game world altered.
68            player activates switch, door gate opens.
69    player's character swim water, movement speed ...
Name: Cleaned Data, Length: 70, dtype: object

In [33]:

def calculate_similarity_score(sentence1, sentence2, model):
    # Generate embeddings for both sentences
    embeddings = model.encode([sentence1, sentence2])

    # Compute cosine similarity between the embeddings
    similarity = cosine_similarity(embeddings[0].reshape(1, -1), embeddings[1].reshape(1, -1))

    # Extract the similarity score (a value between -1 and 1)
    similarity_score = similarity.item()

    return similarity_score

In [34]:

# iterate through the rows of the data frame Requirement with Standard Syntax
for index, row in data.iterrows():
    #split the sentence if it contains a full a comma
    if ',' in row['Cleaned Data']:
        sentence1 = row['Cleaned Data'].split(',')[0]
        sentence2 = row['Cleaned Data'].split(',')[1]
        data.at[index, 'sentence1'] = sentence1
        data.at[index, 'sentence2'] = sentence2
        data.at[index, 'similarity_score'] = calculate_similarity_score(sentence1, sentence2, model)
    else:
        data.at[index, 'sentence1'] = row['Cleaned Data']
        data.at[index, 'sentence2'] = row['Cleaned Data']
        
        data.at[index, 'similarity_score'] = calculate_similarity_score(row['Cleaned Data'], row['Cleaned Data'], model)


In [35]:
def DestructureSentece(sentence):
    # create a spacy object
    doc = nlp(sentence)
    sentence = ' '.join([token.lemma_ for token in doc])

    doc = nlp(sentence)
    

    # Extract nouns, verbs, adjectives, and adverbs from the sentence
    nouns = [token.text for token in doc if token.pos_ == 'NOUN']
    verbs = [token.text for token in doc if token.pos_ == 'VERB']
    adverbs = [token.text for token in doc if token.pos_ == 'ADV']

    print(f'Nouns: {nouns}')
    print(f'Verbs: {verbs}')
    print(f'Adverbs: {adverbs}')

    # calculate_similarity_score of each noun with each verb
    for noun in nouns:
        for verb in verbs:
            similarity_score = calculate_similarity_score(noun, verb, model)
            print(f'{noun} - {verb} = {similarity_score}')

    

In [36]:
import itertools

def generate_requirements(tokens):
    events = ['adjust', 'take']
    subjects = ['user', 'system']
    objects = ['volume', 'slider']
    actions = {
        'adjust': {
            'volume': 'changes the sound level',
            'slider': 'moves smoothly'
        },
        'take': {
            'volume': 'captures the sound',
            'slider': 'grabs the control'
        }
    }

    event_requirements = []

    for event in events:
        if event in tokens:
            for subject, obj in itertools.product(subjects, objects):
                action = actions[event][obj]
                event_requirements.append(f"If {subject} {event}s the {obj}, then {subject} {action}.")
    
    return event_requirements

input_tokens = ['user', 'volume', 'slider', 'system', 'user', 'volume', 'slider', 'adjust', 'take', 'then']
requirements = generate_requirements(input_tokens)

for requirement in requirements:
    
    print(requirement)


If user adjusts the volume, then user changes the sound level.
If user adjusts the slider, then user moves smoothly.
If system adjusts the volume, then system changes the sound level.
If system adjusts the slider, then system moves smoothly.
If user takes the volume, then user captures the sound.
If user takes the slider, then user grabs the control.
If system takes the volume, then system captures the sound.
If system takes the slider, then system grabs the control.


In [49]:
def compareWithall(sentence1):
    sim=0
    rowCount = 0
    rownumber = 0
    #compare the sentence with all the sentences in sentence2 column
    for index, row in data.iterrows():
        similarity= calculate_similarity_score(sentence1, row['sentence2'], model)
        if similarity > 0.34:
            sim = sim + 1
            rownumber = rownumber + 1
            data.at[index, 'Similarity'] = sim
            rowCount = rowCount + 1
            new_req= "If " + sentence1 + " then " + row['sentence2']
            data.at[rownumber, sentence1] = new_req
            print(new_req)
            # DestructureSentece(new_req)
            
# for index, row in data.iterrows():
    # compareWithall(row['sentence1'])        
   
# compareWithall('user wants to exit the game')
compareWithall('user finish the game')

#writing the data to a csv file
# data.to_csv('Statedriven.csv', index=False)


If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.
If user finish the game then  game ends.


KeyboardInterrupt: 

In [38]:
import nltk
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\WB\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [39]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# punkt is a pre-trained tokenizer for english language and averaged_perceptron_tagger is a pre-trained part-of-speech tagger for english language

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\WB\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\WB\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [40]:
from collections import Counter
from nltk import word_tokenize, pos_tag, pos_tag_sents
texts = data['Requirement with Standard Syntax'].tolist()
tagged_texts = pos_tag_sents(map(word_tokenize, texts))


data['POS'] = tagged_texts
data

Unnamed: 0,Requirement with Standard Syntax,Cleaned Data,sentence1,sentence2,similarity_score,SimilarSentence,Similarity,adjust volume slider,POS
0,"When user register into the system, the system...","user register system, system shall allow user ...",user register system,system shall allow user select username passw...,0.395485,False,,,"[(When, WRB), (user, JJ), (register, NN), (int..."
1,"while the game have 720p resolution, the game ...","game numbrp resolution, game shall playable",game numbrp resolution,game shall playable,0.358248,False,,If adjust volume slider then system shall tak...,"[(while, IN), (the, DT), (game, NN), (have, VB..."
2,"When user access his/her account, the system s...","user access his/her account, system shall allo...",user access his/her account,system shall allow user enter password username.,0.410697,False,,If adjust volume slider then system shall cha...,"[(When, WRB), (user, JJ), (access, NN), (his/h..."
3,"When user play tournament, the system shall al...","user play tournament, system shall allow user ...",user play tournament,system shall allow user create team tournament.,0.548651,False,,,"[(When, WRB), (user, JJ), (play, NN), (tournam..."
4,When user participate in the respective tourna...,"user participate respective tournament, system...",user participate respective tournament,system shall allow user select respective teams.,0.445075,False,,,"[(When, WRB), (user, JJ), (participate, NN), (..."
...,...,...,...,...,...,...,...,...,...
65,"If the player finds a hidden collectible, then...","player find hidden collectible, unlock additio...",player find hidden collectible,unlock additional in-game content.,0.125356,False,,,"[(If, IN), (the, DT), (player, NN), (finds, VB..."
66,"If the player solves a puzzle, then a new area...","player solves puzzle, new area pathway revealed.",player solves puzzle,new area pathway revealed.,0.019465,False,,,"[(If, IN), (the, DT), (player, NN), (solves, V..."
67,"If the player triggers an in-game event, then ...","player trigger in-game event, game world altered.",player trigger in-game event,game world altered.,0.319569,False,,,"[(If, IN), (the, DT), (player, NN), (triggers,..."
68,"If the player activates a switch, then a door ...","player activates switch, door gate opens.",player activates switch,door gate opens.,0.30145,False,,,"[(If, IN), (the, DT), (player, NN), (activates..."
