In [75]:
!pip install stanza
import torch
import stanza



In [76]:
print(torch.cuda.is_available())


True


In [77]:
# Download and set up the Stanza pipeline
stanza.download('en')  # for English
nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma,depparse')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …

2023-11-25 20:28:49 INFO: Downloading default packages for language: en (English) ...
2023-11-25 20:28:50 INFO: File exists: C:\Users\pabma\stanza_resources\en\default.zip
2023-11-25 20:28:53 INFO: Finished downloading models and saved to C:\Users\pabma\stanza_resources.
2023-11-25 20:28:53 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …

2023-11-25 20:28:54 INFO: Loading these models for language: en (English):
| Processor | Package           |
---------------------------------
| tokenize  | combined          |
| pos       | combined_charlm   |
| lemma     | combined_nocharlm |
| depparse  | combined_charlm   |

2023-11-25 20:28:54 INFO: Using device: cuda
2023-11-25 20:28:54 INFO: Loading: tokenize
2023-11-25 20:28:54 INFO: Loading: pos
2023-11-25 20:28:55 INFO: Loading: lemma
2023-11-25 20:28:55 INFO: Loading: depparse
2023-11-25 20:28:55 INFO: Done loading processors!


In [133]:
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

def evaluate_syntax_fluency(sentence):
    doc = nlp(sentence)

    if len(doc) == 0:
        return 0.0  # Empty sentence

    num_tokens = len(doc)
    if num_tokens <= 2:  # Penalize very short or fragmentary sentences
        return 0.5

    tree_depths = []
    unique_dependency_types = set()

    for token in doc:
        # Calculate depth of each token in the parse tree
        depth = 0
        current_token = token
        while current_token.head != current_token:
            depth += 1
            current_token = current_token.head
        tree_depths.append(depth)

        # Collect unique dependency types
        unique_dependency_types.add(token.dep_)

    # Metrics
    max_depth = max(tree_depths)
    depth_variety_score = len(unique_dependency_types) / num_tokens

    # Score calculation (adjusted heuristic)
    # Higher max depth might indicate complexity (lower fluency)
    # More variety in dependency types might indicate richer syntactic structure (higher fluency)
    fluency_score = (1 - (max_depth / (2 * num_tokens)) + depth_variety_score) / 2

    return fluency_score



In [134]:
# Test the function
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "While the fox jumps, the dog barks.",
    "Fox."
]

for sentence in sentences:
    fluency_score = evaluate_syntax_fluency(sentence)
    print(f"Sentence: {sentence}")
    print(f"Syntax Fluency Score: {fluency_score:.2f}\n")

Sentence: The quick brown fox jumps over the lazy dog.
Syntax Fluency Score: 0.77

Sentence: While the fox jumps, the dog barks.
Syntax Fluency Score: 0.81

Sentence: Fox.
Syntax Fluency Score: 0.50



In [135]:
def is_yes_no_question(question):
    question = question.lower().strip()
    if question.startswith(('is', 'are', 'do', 'does', 'did', 'was', 'were', 'will', 'can', 'could', 'should', 'have', 'has', 'had')):
        return 1
    else:
        return 0

def is_yes_no_answer(answer):
    answer = answer.lower().strip()
    if answer.startswith(('yes','no')):
        return 1
    else:
        return 0

In [141]:
# Test the function
sentences = [
    "Is this right?",
    "Which questions are you thinking of?",
    "Fox."
]

for sentence in sentences:
    is_yes_no = is_yes_no_question(sentence)
    print(f"Sentence: {sentence}")
    print(f"Is yes/no type question: {is_yes_no:.2f}\n")

Sentence: Is this right?
Is yes/no type question: 1.00

Sentence: Which questions are you thinking of?
Is yes/no type question: 0.00

Sentence: Fox.
Is yes/no type question: 0.00



In [142]:
import spacy
from nltk.tokenize import word_tokenize

# Load the spaCy model for linguistic features
nlp = spacy.load("en_core_web_sm")

def evaluate_conciseness(answer):
    # Tokenize the answer and analyze with spaCy
    doc = nlp(answer)
    word_count = len(word_tokenize(answer))

    # Calculate the ratio of content words (nouns, verbs, adjectives, adverbs) to total words
    content_words_count = sum(token.pos in [spacy.symbols.NOUN, spacy.symbols.VERB, spacy.symbols.ADJ, spacy.symbols.ADV] for token in doc)
    content_ratio = content_words_count / word_count if word_count else 0

    # Conciseness favors higher content ratio (more information with fewer words)
    conciseness_score = content_ratio

    # Adjusting score for extremely short answers
    # Extremely short answers (like 'yes' or 'no') are typically very concise
    if word_count <= 2:
        conciseness_score = max(conciseness_score, 0.9)

    # Adjusting score to be between 0 and 1
    conciseness_score = max(0, min(conciseness_score, 1))

    return conciseness_score

In [145]:
# Test the function
answers = [
    "Yes.",
    "The cat sat on the mat.",
    "This is a somewhat more elaborative answer providing detailed information, albeit not necessarily in a concise manner."
]

for answer in answers:
    score = evaluate_conciseness(answer)
    print(f"Answer: {answer}\nConciseness Score: {score:.2f}\n")

Answer: Yes.
Conciseness Score: 0.90

Answer: The cat sat on the mat.
Conciseness Score: 0.43

Answer: This is a somewhat more elaborative answer providing detailed information, albeit not necessarily in a concise manner.
Conciseness Score: 0.53



In [160]:
def syntactic_score(question,answer):
    coherency = 1 if is_yes_no_question(question) == is_yes_no_answer(answer) else 0
    fluency=evaluate_syntax_fluency(answer)
    conciseness=evaluate_conciseness(answer)
    syntactic_score=(coherency+fluency+conciseness)/3
    return syntactic_score

In [163]:
# Examples putting it all together
questions=['Are dogs pets?',
           'Is a dog a pet?',

           'Is it Saturday',
           'What day is it?',

           'Where are you going?',
           'Are you goind downtown?',

           'Did you forget?',
           'Have you forgotten?',

           'How long has it been?',
           'Has it been long?'
           ]

answers=['I dont know',
         'Yes it is',

         'It is not Saturday',
         'No',

         'Nowhere, stop asking',
         'YES!',
         
         'Maybe I did',
         'No. Stop bothering',

         'Its been very long since we last spoke',
         'No, not really']

In [164]:
# Test the function
for i in range(len(questions)):
    syntactic_score_=syntactic_score(questions[i],answers[i])
    print('Q:',questions[i],'|A:',answers[i],'|SYNTACTIC SCORE:',syntactic_score_ , '\n' ) 

Q: Are dogs pets? |A: I dont know |SYNTACTIC SCORE: 0.4236111111111111 

Q: Is a dog a pet? |A: Yes it is |SYNTACTIC SCORE: 0.638888888888889 

Q: Is it Saturday |A: It is not Saturday |SYNTACTIC SCORE: 0.3125 

Q: What day is it? |A: No |SYNTACTIC SCORE: 0.4666666666666666 

Q: Where are you going? |A: Nowhere, stop asking |SYNTACTIC SCORE: 0.5625 

Q: Are you goind downtown? |A: YES! |SYNTACTIC SCORE: 0.7999999999999999 

Q: Did you forget? |A: Maybe I did |SYNTACTIC SCORE: 0.5277777777777778 

Q: Have you forgotten? |A: No. Stop bothering |SYNTACTIC SCORE: 0.8958333333333334 

Q: How long has it been? |A: Its been very long since we last spoke |SYNTACTIC SCORE: 0.7708333333333334 

Q: Has it been long? |A: No, not really |SYNTACTIC SCORE: 0.7083333333333334 



In [165]:
# Question classifier

for i in range(len(questions)):

    coherency = 1 if is_yes_no_question(questions[i]) == is_yes_no_answer(answers[i]) else 0
    fluency=evaluate_syntax_fluency(answers[i])
    conciseness=evaluate_conciseness(answers[i])
    syntactic_score=(coherency+fluency+conciseness)/3

    print('Q:',questions[i],'|YES/NO?',is_yes_no_question(questions[i]), '||A:',answers[i],'|COHERENCY:',coherency, '|FLUENCY:', fluency, '|CONCISENESS:',conciseness,'||SYNTACTIC SCORE:',syntactic_score , '\n' ) 


Q: Are dogs pets? |YES/NO? 1 ||A: I dont know |COHERENCY: 0 |FLUENCY: 0.9375 |CONCISENESS: 0.3333333333333333 ||SYNTACTIC SCORE: 0.4236111111111111 

Q: Is a dog a pet? |YES/NO? 1 ||A: Yes it is |COHERENCY: 1 |FLUENCY: 0.9166666666666667 |CONCISENESS: 0 ||SYNTACTIC SCORE: 0.638888888888889 

Q: Is it Saturday |YES/NO? 1 ||A: It is not Saturday |COHERENCY: 0 |FLUENCY: 0.9375 |CONCISENESS: 0 ||SYNTACTIC SCORE: 0.3125 

Q: What day is it? |YES/NO? 0 ||A: No |COHERENCY: 0 |FLUENCY: 0.5 |CONCISENESS: 0.9 ||SYNTACTIC SCORE: 0.4666666666666666 

Q: Where are you going? |YES/NO? 0 ||A: Nowhere, stop asking |COHERENCY: 0 |FLUENCY: 0.9375 |CONCISENESS: 0.75 ||SYNTACTIC SCORE: 0.5625 

Q: Are you goind downtown? |YES/NO? 1 ||A: YES! |COHERENCY: 1 |FLUENCY: 0.5 |CONCISENESS: 0.9 ||SYNTACTIC SCORE: 0.7999999999999999 

Q: Did you forget? |YES/NO? 1 ||A: Maybe I did |COHERENCY: 0 |FLUENCY: 0.9166666666666667 |CONCISENESS: 0.6666666666666666 ||SYNTACTIC SCORE: 0.5277777777777778 

Q: Have you forgott