In [2]:
import nltk
import pandas as pd
import plotly.express as plot
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords


stop_words = set(stopwords.words('english'))
try:
    # Read text from a .txt file
    with open('chat.txt', 'r') as file:
        text = file.read().lower()  # Convert to lower case

    tokens = [word for word in word_tokenize(text) if word.isalpha() and word not in stop_words]


    # Calculate word frequency
    word_freq = FreqDist(tokens)

    # Convert word frequency data to a DataFrame
    word_freq_df = pd.DataFrame(word_freq.items(), columns=['Word', 'Frequency']).sort_values(by='Frequency', ascending=False)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(word_freq_df)


    # Create an interactive bar chart using Plotly
    fig = plot.bar(word_freq_df, x='Word', y='Frequency', title='Word Frequency Analysis')

    # Update layout
    fig.update_layout(xaxis_title='Word', yaxis_title='Frequency')

    # Dropdown menu for word filtering
    dropdown_buttons = [
        {'label': 'All Words', 'method': 'relayout',
         'args': [{'xaxis.range': [0, len(word_freq_df)]}, {'title': 'All Words'}]},
        {'label': 'Top 10 Words', 'method': 'relayout',
         'args': [{'xaxis.range': [0, 9]}, {'title': 'Top 10 Words'}]},
        {'label': 'Top 20 Words', 'method': 'relayout',
         'args': [{'xaxis.range': [0, 19]}, {'title': 'Top 20 Words'}]},
    ]

    fig.update_layout(
        updatemenus=[
            {'buttons': dropdown_buttons,
             'direction': 'down',
             'showactive': True,
             }
        ])

    fig.show()

except FileNotFoundError:
    print("The file chat.txt was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Unnamed: 0,Word,Frequency
3,automation,37
62,skills,27
74,job,24
120,education,20
121,training,17
47,may,16
41,jobs,14
40,workers,14
124,programs,14
58,new,11


In [3]:
%pip install textblob
import plotly.express as plot
from textblob import TextBlob
import pandas as pd

# Read texts from a file
with open('chat.txt', 'r') as file:
    texts = file.readlines()

# Perform sentiment analysis for each text and store the results
sentiments = []
for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment
    sentiments.append((text, sentiment.polarity, sentiment.subjectivity))

# Create a DataFrame from the sentiment results
df = pd.DataFrame(sentiments, columns=['Text', 'Polarity', 'Subjectivity'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)
# Create an interactive scatter plot using Plotly Express with tooltips
fig = plot.scatter(
    df,
    x='Polarity',
    y='Subjectivity',
    title='Sentiment Analysis Scatter Plot',
    hover_name='Text',
    labels={'Text': 'Text'} 
)

fig.update_xaxes(title='Polarity')
fig.update_yaxes(title='Subjectivity')

# Show the plot in an interactive window
fig.show()


Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,Text,Polarity,Subjectivity
0,what are the implications to a economy due to ...,-0.125,0.375
1,\n,0.0,0.0
2,Automation can have significant implications f...,0.060455,0.589091
3,\n,0.0,0.0
4,1. **Increased Productivity**: Automation can ...,0.0,1.0
5,\n,0.0,0.0
6,2. **Labor Displacement**: One of the most sig...,0.140625,0.478125
7,\n,0.0,0.0
8,3. **Skill Shift**: While some jobs may be los...,-0.109091,0.636364
9,\n,0.0,0.0


In [4]:
import nltk
import networkx as nx
import plotly.graph_objs as go
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import ne_chunk

# Read text from a file
with open('chat.txt', 'r') as file:
    text = file.read()

# Tokenize the text into sentences and words
sentences = sent_tokenize(text)
words = [word_tokenize(sentence) for sentence in sentences]

# Perform Named Entity Recognition (NER) using NLTK's ne_chunk
ner_tagged_sentences = [ne_chunk(nltk.pos_tag(sentence)) for sentence in words]
print(ner_tagged_sentences)
# Initialize a directed graph
G = nx.DiGraph()

# Extract Named Entities and their labels, and add nodes and edges to the graph
for sentence in ner_tagged_sentences:
    for subtree in sentence:
        if isinstance(subtree, nltk.Tree):
            entity = " ".join([token for token, tag in subtree.leaves()])
            label = subtree.label()
            G.add_node(entity, label=label)
            if len(G.nodes()) > 1:
                G.add_edge(list(G.nodes())[-2], entity)

# Create an interactive network diagram using Plotly
pos = nx.spring_layout(G, seed=42)

edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5),
    hoverinfo='none',
    mode='lines'
)

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        size=10,
    )
)

for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['text'] += tuple([f'Entity: {node}<br>Label: {G.nodes[node]["label"]}'])

layout = go.Layout(
    title="Named Entity Relationship Network Diagram",
    xaxis=dict(title="Entity Type"),
    yaxis=dict(title="Entity Frequency"),
    showlegend=False,
    hovermode='closest',
    margin=dict(b=0, l=0, r=0, t=40),  # Adjust the top margin for the title
)

fig = go.Figure(data=[edge_trace, node_trace], layout=layout)

fig.show()


[Tree('S', [('what', 'WDT'), ('are', 'VBP'), ('the', 'DT'), ('implications', 'NNS'), ('to', 'TO'), ('a', 'DT'), ('economy', 'NN'), ('due', 'JJ'), ('to', 'TO'), ('automation', 'VB'), ('?', '.')]), Tree('S', [('Automation', 'NN'), ('can', 'MD'), ('have', 'VB'), ('significant', 'JJ'), ('implications', 'NNS'), ('for', 'IN'), ('an', 'DT'), ('economy', 'NN'), (',', ','), ('both', 'DT'), ('positive', 'JJ'), ('and', 'CC'), ('negative', 'JJ'), ('.', '.')]), Tree('S', [('These', 'DT'), ('implications', 'NNS'), ('can', 'MD'), ('vary', 'VB'), ('depending', 'VBG'), ('on', 'IN'), ('the', 'DT'), ('extent', 'NN'), ('and', 'CC'), ('speed', 'NN'), ('of', 'IN'), ('automation', 'NN'), (',', ','), ('the', 'DT'), ('specific', 'JJ'), ('industries', 'NNS'), ('affected', 'VBD'), (',', ','), ('and', 'CC'), ('the', 'DT'), ('policies', 'NNS'), ('in', 'IN'), ('place', 'NN'), ('to', 'TO'), ('manage', 'VB'), ('these', 'DT'), ('changes', 'NNS'), ('.', '.')]), Tree('S', [('Here', 'RB'), ('are', 'VBP'), ('some', 'DT'),

In [5]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag
import plotly.graph_objects as go
import pandas as pd
from collections import Counter, defaultdict

# Read text from a file
with open('chat.txt', 'r') as file:
    text = file.read()

# Tokenize the text into sentences and words
sentences = sent_tokenize(text)
words = word_tokenize(text)

# Perform POS tagging
pos_tags = pos_tag(words)

# Count the occurrences of each POS tag and collect examples
pos_tag_counts = Counter()
examples = defaultdict(set)
for word, tag in pos_tags:
    pos_tag_counts[tag] += 1
    examples[tag].add(word)
    if len(examples[tag]) > 5:
        examples[tag].remove(next(iter(examples[tag])))

# Create a DataFrame with clarifications for POS tags
tag_clarifications = {
    'NN': 'Noun', 'VB': 'Verb', 'JJ': 'Adjective', 'RB': 'Adverb',
    'PRP': 'Pronoun', 'IN': 'Preposition', 'CC': 'Conjunction',
    'DT': 'Determiner', 'UH': 'Interjection', '.': 'Punctuation'
}

df = pd.DataFrame.from_dict(pos_tag_counts, orient='index', columns=['Count'])
df.index.name = 'POS Tag'
df.reset_index(inplace=True)
df['Clarification'] = df['POS Tag'].map(tag_clarifications)
df['Examples'] = df['POS Tag'].apply(lambda tag: ', '.join(examples[tag]))

# Create a new column for labels that include POS tag and clarification
df['Label'] = df['POS Tag'] + ' (' + df['Clarification'] + ')'

# Create the Treemap Chart
fig = go.Figure(go.Treemap(
    labels=df['Label'],
    parents=[''] * len(df),
    values=df['Count'],
    textinfo="label+value+text",
    customdata=df['Examples'],
    hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Examples: %{customdata}<extra></extra>'
))

# Customize the layout and display the chart
fig.update_layout(title='Part-of-Speech Tags Treemap Chart')
fig.show()


In [6]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from nltk.tokenize.treebank import TreebankWordDetokenizer
import plotly.express as px
import pandas as pd

# Read the text from a file
with open('chat.txt', 'r') as file:
    text = file.read()

# Tokenize the text into sentences and words
sentences = sent_tokenize(text)
words = word_tokenize(text)

# Remove stopwords and punctuation
stop_words = set(stopwords.words('english'))
filtered_words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words]

# Calculate word frequencies
freq_dist = FreqDist(filtered_words)

# Determine the most important sentences based on word frequency
num_sentences = 3 

sentence_scores = {}
for sentence in sentences:
    for word in word_tokenize(sentence.lower()):
        if word in freq_dist:
            if sentence not in sentence_scores:
                sentence_scores[sentence] = freq_dist[word]
            else:
                sentence_scores[sentence] += freq_dist[word]

summary_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]

# Generate the summary
summary = TreebankWordDetokenizer().detokenize(summary_sentences)

# Create a DataFrame for the line chart
df = pd.DataFrame({'Sentence Index': range(len(sentences)), 'Sentence Importance Score': [sentence_scores[sentence] for sentence in sentences]})

# Create an interactive line chart
fig = px.line(df, x='Sentence Index', y='Sentence Importance Score', title='Sentence Importance Over Time')
fig.update_xaxes(title='Sentence Index')
fig.update_yaxes(title='Sentence Importance Score')

# Show the interactive line chart
fig.show()
