# 1.1 Read input

In [47]:
# Read text from chatlog
with open("chatlog.txt", "r") as file:
    lines = file.readlines()

In [48]:
lines

['User: Hi, can you tell me about Python? and\n',
 'why do we need it?\n',
 'AI: Sure! Python is a popular programming language known for\n',
 'its readability.\n',
 'User: What can I use it for?\n',
 'AI: You can use Python for web development, data analysis,\n',
 'AI, and more.']

# 2.1 Chat Log Parsing

> As per given format of the input text, conversations of a specific speakers can be in different line for a single conversation

In [49]:
# Merge multi-line messages for each speaker

messages = []
current_speaker = None
current_message = ""

for line in lines:
    line = line.strip() # Remove leading/trailing whitespace

    # Check user
    if line.startswith("User:"):
        if current_speaker is not None:
            # If the current speaker is not None, it means we have a previous message
            messages.append((current_speaker, current_message.strip()))

        current_speaker = "User"
        current_message = line[len("User: "):].strip()
    
    # Check AI
    elif line.startswith("AI: "):
        if current_speaker is not None:
            messages.append((current_speaker, current_message.strip()))
        current_speaker = "AI"
        current_message = line[len("AI: "):].strip()
    
    else:
        current_message += " " + line

# Don't forget the last message
if current_speaker is not None:
    messages.append((current_speaker, current_message.strip()))

# Example: print all messages
for speaker, message in messages:
    print(f"{speaker}: {message}")

User: Hi, can you tell me about Python? and why do we need it?
AI: Sure! Python is a popular programming language known for its readability.
User: What can I use it for?
AI: You can use Python for web development, data analysis, AI, and more.


In [58]:
messages

[('User', 'Hi, can you tell me about Python? and why do we need it?'),
 ('AI',
  'Sure! Python is a popular programming language known for its readability.'),
 ('User', 'What can I use it for?'),
 ('AI',
  'You can use Python for web development, data analysis, AI, and more.')]

In [None]:
messages[1] # Second message

User: Hi, can you tell me about Python? and why do we need it?
AI: Sure! Python is a popular programming language known for its readability.
User: What can I use it for?
AI: You can use Python for web development, data analysis, AI, and more.


# 2.3 Message Statistics

In [63]:
# Count total messages
print("TOtal number of messages", len(messages))

TOtal number of messages 4


In [64]:
# Count messages from User vs. AI
user_messages = [msg for msg in messages if msg[0] == "User"]
ai_messages = [msg for msg in messages if msg[0] == "AI"]

print("USER Message Count: ", len(user_messages))
print("AI Replyy Count: ", len(ai_messages))

USER Message Count:  2
AI Replyy Count:  2


# 2.3 Keyword Analysis

In [85]:
## Extract the top 5 most frequent used words

# To do this we need to identify the stopwords so that we dont count them as frequent words

# So, we will create a custom set of common stop words
stop_words = set([
    "the", "is", "a", "an", "and", "or", "to", "for", "of", "in", "on", "with",
    "can", "i", "you", "it", "its", "me", "about", "what", "we", "do", "hi", "why"
])


# Now we can count the words in the messages
from collections import Counter
word_counter = Counter()

for speaker, message in messages:
    # Split the message into words
    words = message.split()
    
    # Filter out stopwords and count the words
    for word in words:
        word = word.strip(".,!?").lower()  # Convert to lowercase
        if word not in stop_words:
            word_counter[word] += 1

# Get the most frequent words
most_frequent_words = [kw for kw, count in word_counter.most_common(5)]
print("Most frequent words:", most_frequent_words)

Most frequent words: ['python', 'use', 'tell', 'need', 'sure']


# 2.4 Generate Summary

In [86]:
# Count total exchanges (User-AI Pairs Count)
exchanges_count = min(len(user_messages), len(ai_messages))
print("Total exchanges (User-AI Pairs Count): ", exchanges_count)

Total exchanges (User-AI Pairs Count):  2


In [87]:
main_topic = most_frequent_words[0] if most_frequent_words else None
print("Main topic of the conversation: ", main_topic)

Main topic of the conversation:  python


In [88]:
# Print the summary
print("Summary:")
print(f"- The conversation had {exchanges_count} exchanges.")
print(f"- The user asked mainly about {main_topic} and its uses.")
print(f"- Most common keywords: {', '.join(most_frequent_words)}")

Summary:
- The conversation had 2 exchanges.
- The user asked mainly about python and its uses.
- Most common keywords: python, use, tell, need, sure
