In [None]:
pip install textstat

Collecting textstat
  Downloading textstat-0.7.3-py3-none-any.whl (105 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyphen (from textstat)
  Downloading pyphen-0.15.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.15.0 textstat-0.7.3


In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import gensim
from gensim import corpora
from gensim.models.ldamodel import LdaModel
import spacy
from textstat.textstat import textstatistics


In [None]:
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Tokenize into sentences
    sentences = sent_tokenize(text)
    # Tokenize into words
    words = [word_tokenize(sentence) for sentence in sentences]
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [[word for word in word_list if word not in stop_words] for word_list in words]
    return filtered_words


In [None]:
def identify_keywords(text):
    # Assume preprocess_text has been called first
    processed_text = preprocess_text(text)
    # Flatten the list of lists
    all_words = [word for sublist in processed_text for word in sublist]
    # Frequency distribution
    freq_dist = nltk.FreqDist(all_words)
    keywords = [word for word, freq in freq_dist.items() if freq > 1]
    return keywords


In [None]:
def topic_modeling(text):
    processed_text = preprocess_text(text)
    dictionary = corpora.Dictionary(processed_text)
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in processed_text]
    lda_model = LdaModel(doc_term_matrix, num_topics=5, id2word=dictionary, passes=25)  # Adjust num_topics and passes as needed
    topics = lda_model.print_topics(num_words=3)  # Adjust num_words as needed
    return topics


In [None]:
def named_entity_recognition(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    entities = [(entity.text, entity.label_) for entity in doc.ents]
    return entities


In [None]:
def analyze_complexity(text):
    syllables = textstatistics().syllable_count(text)
    sentences = textstatistics().sentence_count(text)
    words = len(word_tokenize(text))
    # The Flesch-Kincaid Grade Level formula
    flesch_kincaid_grade = 0.39 * (words/sentences) + 11.8 * (syllables/words) - 15.59
    return flesch_kincaid_grade


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
interview_text = """[Interviewer: Can you tell us about your experience with software development especially in the context of web applications?

Candidate: Absolutely I've been involved in software development for over five years now with a particular focus on web applications I've worked extensively with technologies like JavaScript React and Node.js. One of my significant projects was developing a real-time analytics dashboard for a retail client which involved complex data processing and visualization. We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?

Candidate: The project had its set of challenges especially around handling large volumes of data in real-time We utilized WebSocket for real-time data communication between the server and the client For data processing and management we implemented Redis as an in-memory database to reduce latency. The entire development process was agile which allowed us to iterate quickly based on user feedback and continuously improve the application's performance and usability.

Interviewer: Have you had experience with cloud technologies or containerization?

Candidate: Yes indeed For several projects I've leveraged cloud services mainly AWS to enhance scalability and reliability I have experience with EC2 for virtual servers S3 for storage and Lambda for serverless functions which was particularly useful for background tasks and automations. Regarding containerization I've used Docker to create lightweight portable and consistent environments for development testing and deployment facilitating a smooth CI/CD pipeline with Jenkins.

Interviewer: And how do you keep up with the rapidly changing technology landscape?

Candidate: I'm a firm believer in continuous learning I regularly attend tech meetups participate in online forums and take courses on platforms like Coursera and Udemy Recently I've been diving into machine learning and AI understanding their potential applications in web development especially in personalized user experiences and predictive analytics.

Interviewer: Thank you for sharing your experiences with us It's clear you have a strong foundation in software development and a proactive approach to learning and adapting to new technologies]"""
keywords = identify_keywords(interview_text)
print("Keywords:", keywords)

topics = topic_modeling(interview_text)
print("Topics:", topics)

entities = named_entity_recognition(interview_text)
print("Named Entities:", entities)

complexity_score = analyze_complexity(interview_text)
print("Complexity Score:", complexity_score)


Keywords: ['interviewer', ':', 'us', 'experience', 'software', 'development', 'especially', 'web', 'applications', '?', 'candidate', "'ve", 'involved', 'technologies', 'like', 'react', 'node.js', '.', 'projects', 'real-time', 'analytics', 'client', 'data', 'processing', 'used', 'user', 'scalability', 'project', 'approach', 'challenges', "'s", 'cloud', 'containerization', 'learning', 'experiences']
Topics: [(0, '0.035*"development" + 0.024*"experience" + 0.024*"us"'), (1, '0.026*"user" + 0.026*"learning" + 0.026*"."'), (2, '0.040*"\'ve" + 0.022*"technologies" + 0.022*"software"'), (3, '0.039*"data" + 0.030*"real-time" + 0.030*"."'), (4, '0.046*":" + 0.046*"interviewer" + 0.031*"?"')]
Named Entities: [('five years', 'DATE'), ('JavaScript React', 'ORG'), ('One', 'CARDINAL'), ('Node.js', 'ORG'), ('WebSocket', 'ORG'), ('AWS', 'ORG'), ('S3', 'CARDINAL'), ('Lambda', 'NORP'), ('Docker', 'PERSON'), ('CI', 'PERSON'), ('Jenkins', 'PERSON'), ('Coursera', 'PERSON'), ('Udemy', 'PERSON'), ('AI', 'ORG

In [None]:
pip install spacy




In [None]:
import spacy

# Load the SpaCy model
nlp = spacy.load("en_core_web_sm")

def dependency_parsing(text):
    # Process the text with SpaCy
    doc = nlp(text)

    # Iterate over the sentences in the document
    for sentence in doc.sents:
        print(f"Sentence: {sentence.text}\n")
        # Iterate over each token in the sentence
        for token in sentence:
            print(f"{token.text:<12} {token.dep_:<10} {token.head.text:<12} {token.head.pos_:<6} {spacy.explain(token.dep_)}")
        print("\n---\n")

# Example text
example_text = """Interviewer: Can you tell us about your experience with software development especially in the context of web applications?

Candidate: Absolutely I've been involved in software development for over five years now with a particular focus on web applications I've worked extensively with technologies like JavaScript React and Node.js. One of my significant projects was developing a real-time analytics dashboard for a retail client which involved complex data processing and visualization. We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?

Candidate: The project had its set of challenges especially around handling large volumes of data in real-time We utilized WebSocket for real-time data communication between the server and the client For data processing and management we implemented Redis as an in-memory database to reduce latency. The entire development process was agile which allowed us to iterate quickly based on user feedback and continuously improve the application's performance and usability.

Interviewer: Have you had experience with cloud technologies or containerization?

Candidate: Yes indeed For several projects I've leveraged cloud services mainly AWS to enhance scalability and reliability I have experience with EC2 for virtual servers S3 for storage and Lambda for serverless functions which was particularly useful for background tasks and automations. Regarding containerization I've used Docker to create lightweight portable and consistent environments for development testing and deployment facilitating a smooth CI/CD pipeline with Jenkins.

Interviewer: And how do you keep up with the rapidly changing technology landscape?

Candidate: I'm a firm believer in continuous learning I regularly attend tech meetups participate in online forums and take courses on platforms like Coursera and Udemy Recently I've been diving into machine learning and AI understanding their potential applications in web development especially in personalized user experiences and predictive analytics.

Interviewer: Thank you for sharing your experiences with us It's clear you have a strong foundation in software development and a proactive approach to learning and adapting to new technologies"""

dependency_parsing(example_text)


Sentence: Interviewer: Can you tell us about your experience with software development especially in the context of web applications?



Interviewer  ROOT       Interviewer  NOUN   root
:            punct      Interviewer  NOUN   punctuation
Can          aux        tell         VERB   auxiliary
you          nsubj      tell         VERB   nominal subject
tell         acl        Interviewer  NOUN   clausal modifier of noun (adjectival clause)
us           dobj       tell         VERB   direct object
about        prep       tell         VERB   prepositional modifier
your         poss       experience   NOUN   possession modifier
experience   pobj       about        ADP    object of preposition
with         prep       experience   NOUN   prepositional modifier
software     compound   development  NOUN   compound
development  pobj       with         ADP    object of preposition
especially   advmod     in           ADP    adverbial modifier
in           prep       tell         VERB   preposi

In [None]:
import spacy

# Load the SpaCy model
nlp = spacy.load("en_core_web_sm")

def assess_technology_experience(text):
    doc = nlp(text)

    for sentence in doc.sents:
        has_development_verb = False
        tech_skills = []

        for token in sentence:
            # Check for development-related verbs that could indicate hands-on experience
            if token.dep_ == "ROOT" and token.lemma_ in {"develop", "build", "create", "implement", "use", "utilize"}:
                has_development_verb = True

            # Check for technologies mentioned as direct objects or objects of prepositions, indicating use in the project
            if token.dep_ in {"dobj", "pobj"} and token.pos_ == "PROPN":
                tech_skills.append(token.text)

        # Assessing the level based on identified patterns
        if has_development_verb and tech_skills:
            for skill in tech_skills:
                print(f"The candidate has intermediate to advanced experience with {skill} based on the context: '{sentence.text}'")
        elif tech_skills:
            for skill in tech_skills:
                print(f"The candidate mentioned {skill}, indicating familiarity: '{sentence.text}'")

# Example usage
example_text = """Interviewer: Can you tell us about your experience with software development especially in the context of web applications?

Candidate: Absolutely I've been involved in software development for over five years now with a particular focus on web applications I've worked extensively with technologies like JavaScript React and Node.js. One of my significant projects was developing a real-time analytics dashboard for a retail client which involved complex data processing and visualization. We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?

Candidate: The project had its set of challenges especially around handling large volumes of data in real-time We utilized WebSocket for real-time data communication between the server and the client For data processing and management we implemented Redis as an in-memory database to reduce latency. The entire development process was agile which allowed us to iterate quickly based on user feedback and continuously improve the application's performance and usability.

Interviewer: Have you had experience with cloud technologies or containerization?

Candidate: Yes indeed For several projects I've leveraged cloud services mainly AWS to enhance scalability and reliability I have experience with EC2 for virtual servers S3 for storage and Lambda for serverless functions which was particularly useful for background tasks and automations. Regarding containerization I've used Docker to create lightweight portable and consistent environments for development testing and deployment facilitating a smooth CI/CD pipeline with Jenkins.

Interviewer: And how do you keep up with the rapidly changing technology landscape?

Candidate: I'm a firm believer in continuous learning I regularly attend tech meetups participate in online forums and take courses on platforms like Coursera and Udemy Recently I've been diving into machine learning and AI understanding their potential applications in web development especially in personalized user experiences and predictive analytics.

Interviewer: Thank you for sharing your experiences with us It's clear you have a strong foundation in software development and a proactive approach to learning and adapting to new technologies"""
assess_technology_experience(example_text)




The candidate mentioned React, indicating familiarity: 'Absolutely I've been involved in software development for over five years now with a particular focus on web applications I've worked extensively with technologies like JavaScript React and Node.js.'
The candidate has intermediate to advanced experience with React based on the context: 'We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?

'
The candidate has intermediate to advanced experience with Interviewer based on the context: 'We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?

'
The cand

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF, TruncatedSVD
import numpy as np

# Assuming `preprocess_text` returns a list of tokenized, lowercased, and stopwords-removed words for each document
processed_docs = preprocess_text(example_text)  # Preprocess your text first

# Joining the processed tokens back into document strings
docs = [" ".join(doc) for doc in processed_docs]

# Create a TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english')
tfidf = tfidf_vectorizer.fit_transform(docs)

# NMF Model
nmf_model = NMF(n_components=5, random_state=42)
nmf_topic_matrix = nmf_model.fit_transform(tfidf)

# LSA (also known as LSI) Model
lsa_model = TruncatedSVD(n_components=5, n_iter=100)
lsa_topic_matrix = lsa_model.fit_transform(tfidf)

# Function to display topics
def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic %d:" % (topic_idx+1))
        print(" ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]))

no_top_words = 5

# Display NMF Topics
print("NMF Model Topics:")
display_topics(nmf_model, tfidf_vectorizer.get_feature_names_out(), no_top_words)

# Display LSA Topics
print("\nLSA Model Topics:")
display_topics(lsa_model, tfidf_vectorizer.get_feature_names_out(), no_top_words)


NMF Model Topics:
Topic 1:
learning web applications software development
Topic 2:
data time real processing client
Topic 3:
interviewer approach challenges project like
Topic 4:
cloud experience containerization ve projects
Topic 5:
user development used containerization ve

LSA Model Topics:
Topic 1:
interviewer development ve software experience
Topic 2:
data time real client processing
Topic 3:
interviewer project challenges data approach
Topic 4:
cloud experience containerization scalability ve
Topic 5:
user used containerization interviewer development


In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string

def calculate_ttr(text):
    # Tokenize the text
    tokens = word_tokenize(text)
    # Remove punctuation and convert to lower case
    tokens = [word.lower() for word in tokens if word.isalpha()]
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Calculate Type-Token Ratio (TTR)
    types = set(tokens)
    ttr = len(types) / len(tokens) if tokens else 0

    return ttr

def calculate_sophistication_metrics(text):
    tokens = word_tokenize(text)
    # Removing punctuation and making lowercase
    tokens = [word.lower() for word in tokens if word.isalpha()]
    # Removing stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if not word in stop_words]

    # Number of Tokens
    total_tokens = len(tokens)
    # Number of Types
    total_types = len(set(tokens))
    # Calculate TTR
    ttr = total_types / total_tokens if total_tokens > 0 else 0

    return {
        'total_tokens': total_tokens,
        'total_types': total_types,
        'type_token_ratio': ttr
    }

# Example Usage
interview_text = example_text
sophistication_metrics = calculate_sophistication_metrics(interview_text)
print(f"Total Tokens: {sophistication_metrics['total_tokens']}")
print(f"Total Types: {sophistication_metrics['total_types']}")
print(f"Type-Token Ratio (TTR): {sophistication_metrics['type_token_ratio']:.3f}")


Total Tokens: 202
Total Types: 152
Type-Token Ratio (TTR): 0.752


In [None]:
from nltk.tokenize import sent_tokenize, word_tokenize
import spacy

# Load SpaCy for advanced NLP tasks (if not already loaded)
nlp = spacy.load("en_core_web_sm")

def analyze_thought_process(text):
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)
    # Initialize counters and lists
    logical_connectors = ['therefore', 'however', 'for example', 'because', 'firstly', 'secondly', 'furthermore', 'moreover']
    connector_count = 0
    sentence_lengths = []

    # Analyze each sentence
    for sentence in sentences:
        # Tokenize the sentence into words and calculate its length
        words = word_tokenize(sentence)
        sentence_lengths.append(len(words))
        # Count logical connectors
        connector_count += sum(1 for word in words if word.lower() in logical_connectors)

    # Calculate average sentence length
    avg_sentence_length = sum(sentence_lengths) / len(sentence_lengths) if sentence_lengths else 0

    return {
        'total_sentences': len(sentences),
        'average_sentence_length': avg_sentence_length,
        'logical_connector_count': connector_count,
    }

# Example Usage
interview_text = example_text
thought_process_metrics = analyze_thought_process(interview_text)
print(f"Total Sentences: {thought_process_metrics['total_sentences']}")
print(f"Average Sentence Length: {thought_process_metrics['average_sentence_length']:.2f}")
print(f"Logical Connector Count: {thought_process_metrics['logical_connector_count']}")




Total Sentences: 13
Average Sentence Length: 29.31
Logical Connector Count: 0


In [None]:
import spacy

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

def analyze_thought_process(text):
    doc = nlp(text)
    complex_reasoning_indicators = 0

    for sentence in doc.sents:
        print(f"Analyzing Sentence: {sentence.text}")
        for token in sentence:
            # Looking for subordinate clauses (marking complex sentences)
            if token.dep_ in ['advcl', 'csubj', 'csubjpass']:
                print(f" - Complex reasoning found with '{token.head.text}' due to '{token.text}' ({token.dep_})")
                complex_reasoning_indicators += 1

            # Looking for conditional sentences (if-then logic)
            if token.dep_ == 'cond':
                print(f" - Conditional logic found with '{token.head.text}' due to '{token.text}' ({token.dep_})")
                complex_reasoning_indicators += 1

            # Looking for appositions (additional explanations or details)
            if token.dep_ == 'appos':
                print(f" - Detailed explanation found with '{token.head.text}' due to '{token.text}' ({token.dep_})")
                complex_reasoning_indicators += 1

    print(f"Total indicators of complex reasoning: {complex_reasoning_indicators}")

# Example text

analyze_thought_process(example_text)


Analyzing Sentence: Interviewer: Can you tell us about your experience with software development especially in the context of web applications?


Analyzing Sentence: Candidate:
Analyzing Sentence: Absolutely I've been involved in software development for over five years now with a particular focus on web applications I've worked extensively with technologies like JavaScript React and Node.js.
Analyzing Sentence: One of my significant projects was developing a real-time analytics dashboard for a retail client which involved complex data processing and visualization.
Analyzing Sentence: We used React for the frontend to ensure a responsive and intuitive user interface and Node.js on the backend for its scalability and efficiency with real-time data.

Interviewer: That sounds like an impressive project How did you approach the challenges that came with it?


Analyzing Sentence: Candidate: The project had its set of challenges especially around handling large volumes of data in real-time W