In [1]:
!pip install tensorflow -q


[notice] A new release of pip is available: 23.0.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Generate a CSV file with punctuations.

import os
import re
import pandas as pd

def process_folder(folder_path, label):
    """Reads all text files in a folder, cleans lines, and returns labeled data."""
    cleaned_data = []
    
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)

        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.read()
        
        # Remove "SampleX: " in any case (Sample, sample, SAMPLE, etc.)
        cleaned_text = re.sub(r"(?i)\bsample\d+:\s*", "", lines).strip()
        
        # Split into lines and store non-empty ones with labels
        for line in cleaned_text.split("\n"):
            if line.strip():
                cleaned_data.append((line.strip(), label))
    
    return cleaned_data

# Define folder paths
addressing_folder = r"D:\main project\Addressing"
non_addressing_folder = r"D:\main project\Non Addressing"

# Process both folders
addressing_data = process_folder(addressing_folder, 0)  # Label 0 for Addressing
non_addressing_data = process_folder(non_addressing_folder, 1)  # Label 1 for Non-Addressing

# Combine and create DataFrame
data = addressing_data + non_addressing_data
df = pd.DataFrame(data, columns=["Text", "Label"])

# Shuffle the DataFrame for variety
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Display the first few rows
print(df.head())

# Save to CSV (optional)
df.to_csv("cleaned_mixed_data.csv", index=False, encoding="utf-8")
df

                                                Text  Label
0  I was reading about the history of the Industr...      1
1  I was discussing the role of artificial intell...      1
2  I was reading about the use of AI in agricultu...      0
3  I was listening to a lecture on the psychology...      1
4  I read an article about the evolution of photo...      1


Unnamed: 0,Text,Label
0,I was reading about the history of the Industr...,1
1,I was discussing the role of artificial intell...,1
2,I was reading about the use of AI in agricultu...,0
3,I was listening to a lecture on the psychology...,1
4,I read an article about the evolution of photo...,1
...,...,...
33036,I’ve been learning about the human brain and h...,1
33037,I was reading about the latest advancements in...,0
33038,I was reading about the advancements in medica...,0
33039,I was reading about the latest advancements in...,0


In [None]:
# Genereate a new file with no punctuation

import os
import re
import string
import pandas as pd

def process_folder(folder_path, label):
    """Reads all text files in a folder, cleans lines, removes punctuation, and returns labeled data."""
    cleaned_data = []
    
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)

        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.read()
        
        # Remove "SampleX: " in any case (Sample, sample, SAMPLE, etc.)
        cleaned_text = re.sub(r"(?i)\bsample\d+:\s*", "", lines).strip()
        
        # Split into lines, remove punctuation, and store non-empty ones with labels
        for line in cleaned_text.split("\n"):
            if line.strip():
                # Remove all punctuation
                line_no_punct = line.translate(str.maketrans('', '', string.punctuation))
                cleaned_data.append((line_no_punct.strip(), label))
    
    return cleaned_data

# Define folder paths
addressing_folder = r"D:\main project\Addressing"
non_addressing_folder = r"D:\main project\Non Addressing"

# Process both folders
addressing_data = process_folder(addressing_folder, 0)  # Label 0 for Addressing
non_addressing_data = process_folder(non_addressing_folder, 1)  # Label 1 for Non-Addressing

# Combine and create DataFrame
data = addressing_data + non_addressing_data
df = pd.DataFrame(data, columns=["Text", "Label"])

# Shuffle the DataFrame for variety
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Display the first few rows
print(df.head())

# Save to CSV (optional)
df.to_csv("cleaned_mixed_data_no_punct.csv", index=False, encoding="utf-8")
df

                                                Text  Label
0  I was reading about the history of the Industr...      1
1  I was discussing the role of artificial intell...      1
2  I was reading about the use of AI in agricultu...      0
3  I was listening to a lecture on the psychology...      1
4  I read an article about the evolution of photo...      1


Unnamed: 0,Text,Label
0,I was reading about the history of the Industr...,1
1,I was discussing the role of artificial intell...,1
2,I was reading about the use of AI in agricultu...,0
3,I was listening to a lecture on the psychology...,1
4,I read an article about the evolution of photo...,1
...,...,...
33036,I’ve been learning about the human brain and h...,1
33037,I was reading about the latest advancements in...,0
33038,I was reading about the advancements in medica...,0
33039,I was reading about the latest advancements in...,0


In [None]:
import tensorflow as tf
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences # type:ignore

# Load the model and tokenizer
model = tf.keras.models.load_model('robot_addressing_classifier.h5')

# Load the tokenizer
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

def classify_text(text, max_sequence_length=100):
    """
    Classify a single text input to determine if it's addressing a robot.
    
    Args:
        text: Text string to classify
        max_sequence_length: Maximum length for padding (should match training)
        
    Returns:
        Dictionary with prediction results
    """
    # Convert to sequence
    sequences = tokenizer.texts_to_sequences([text])
    
    # Pad sequence
    padded_sequence = pad_sequences(
        sequences,
        maxlen=max_sequence_length,
        padding='post'
    )
    
    # Make prediction
    prediction_prob = model.predict(padded_sequence)[0][0]
    predicted_class = 1 if prediction_prob > 0.5 else 0
    
    # Return result
    is_addressing_robot = (predicted_class == 0)
    
    return {
        'text': text,
        'is_addressing_robot': is_addressing_robot,
        'confidence': float(max(prediction_prob, 1 - prediction_prob))
    }

# Example usage
if __name__ == "__main__":
    # Test with different examples
    test_examples = [
        "Hey robot, what's the weather today?",
        "I need to finish my homework soon.",
        "Robot, can you help me with this?",
        "The meeting starts at 2 PM."
    ]
    
    for text in test_examples:
        result = classify_text(text)
        status = "IS" if result['is_addressing_robot'] else "is NOT"
        print(f"Text: \"{text}\"")
        print(f"Result: This {status} addressing the robot")
        print(f"Confidence: {result['confidence']:.2f}")
        print()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 632ms/step
Text: "Hey robot, what's the weather today?"
Result: This IS addressing the robot
Confidence: 1.00

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Text: "I need to finish my homework soon."
Result: This is NOT addressing the robot
Confidence: 1.00

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Text: "Robot, can you help me with this?"
Result: This IS addressing the robot
Confidence: 1.00

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Text: "The meeting starts at 2 PM."
Result: This is NOT addressing the robot
Confidence: 1.00



In [3]:
classify_text("The Eiffel Tower is one of the most iconic landmarks in the world, located in Paris, France. It was designed by Gustave Eiffel and completed in 1889 as the entrance arch for the 1889 Exposition Universelle (World’s Fair).What do you think.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


{'text': 'The Eiffel Tower is one of the most iconic landmarks in the world, located in Paris, France. It was designed by Gustave Eiffel and completed in 1889 as the entrance arch for the 1889 Exposition Universelle (World’s Fair).What do you think.',
 'is_addressing_robot': True,
 'confidence': 0.9999315142631531}

In [10]:
classify_text("I am an intelligent boy. I am a good student. I am a good citizen. I am a good person. I am a good human being. what is your opinions")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


{'text': 'I am an intelligent boy. I am a good student. I am a good citizen. I am a good person. I am a good human being. what is your opinions',
 'is_addressing_robot': False,
 'confidence': 0.9011706709861755}