In [25]:
import tkinter as tk
from tkinter import scrolledtext
from tkinter import messagebox
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB  
from sklearn.svm import SVC  
from sklearn.metrics import accuracy_score


# Load dataset

In [26]:
def load_dataset(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    X, y = [], []
    for line in lines:
        line = line.strip()  # Remove whitespace
        
        if not line:  # Skip empty lines
            continue
        
        parts = line.split('||')
        if len(parts) == 2:
            X.append(parts[0])
            y.append(parts[1].strip())  # remove whitespace from the summary
        else:
            print(f"Ignoring line: {line}")  # Print the problematic line for debugging
    
    if len(X) == 0:
        raise ValueError("No valid data found in the file.")
    
    return X, y


# Train and test 

In [27]:
def train_test_models(X, y):
    if len(X) == 0:
        raise ValueError("No data available for training.")

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Vectorize text data
    vectorizer = TfidfVectorizer()
    X_train_vect = vectorizer.fit_transform(X_train)
    X_test_vect = vectorizer.transform(X_test)
    
    # Train models
    logistic_reg_model = LogisticRegression()
    logistic_reg_model.fit(X_train_vect, y_train)
    
    random_forest_model = RandomForestClassifier()
    random_forest_model.fit(X_train_vect, y_train)
    
    
    naive_bayes_model = MultinomialNB()
    naive_bayes_model.fit(X_train_vect, y_train)
    
    svm_model = SVC()
    svm_model.fit(X_train_vect, y_train)
    
    # Evaluate models
    logistic_reg_pred = logistic_reg_model.predict(X_test_vect)
    random_forest_pred = random_forest_model.predict(X_test_vect)
    naive_bayes_pred = naive_bayes_model.predict(X_test_vect)
    svm_pred = svm_model.predict(X_test_vect)
    
    # Calculate accuracy
    logistic_reg_accuracy = accuracy_score(y_test, logistic_reg_pred) * 100
    random_forest_accuracy = accuracy_score(y_test, random_forest_pred) * 100
    naive_bayes_accuracy = accuracy_score(y_test, naive_bayes_pred) * 100
    svm_accuracy = accuracy_score(y_test, svm_pred) * 100
    
    return logistic_reg_accuracy, random_forest_accuracy, naive_bayes_accuracy, svm_accuracy, logistic_reg_model, random_forest_model, naive_bayes_model, svm_model


In [28]:
def summarize_text(input_text, best_model):
    try:
        with open('Data_Summaryy.txt', 'r') as file:
            lines = file.readlines()
            data_dict = {}
            for line in lines:
                parts = line.split('||')
                if len(parts) == 2:
                    data_dict[parts[0].strip()] = parts[1].strip()

            summary = data_dict.get(input_text, "No summary found")
            return summary
    except FileNotFoundError:
        messagebox.showerror("Error", "Data file not found.")

# Load and preprocess dataset

In [29]:
try:
    X, y = load_dataset('Data_Summary.txt')
    
    # Print loaded data
    print("Loaded X samples:")
    print(X[:5])  # Print the first 5 samples
    print("Loaded y samples:")
    print(y[:5])  # Print the corresponding summaries for the first 5 samples

    # Train and test machine learning models
    logistic_reg_accuracy, random_forest_accuracy, naive_bayes_accuracy, svm_accuracy, logistic_reg_model, random_forest_model, naive_bayes_model, svm_model = train_test_models(X, y)
    print("Logistic Regression Accuracy:", logistic_reg_accuracy)
    print("Random Forest Accuracy:", random_forest_accuracy)
    print("Naive Bayes Accuracy:", naive_bayes_accuracy)
    print("SVM Accuracy:", svm_accuracy)

    # Select the best model based on accuracy
    best_model = logistic_reg_model if logistic_reg_accuracy > max(random_forest_accuracy, naive_bayes_accuracy, svm_accuracy) else random_forest_model if random_forest_accuracy > max(naive_bayes_accuracy, svm_accuracy) else naive_bayes_model if naive_bayes_accuracy > svm_accuracy else svm_model

except ValueError as e:
    print(e)
    logistic_reg_accuracy, random_forest_accuracy, naive_bayes_accuracy, svm_accuracy = 0, 0, 0, 0


Ignoring line: On cloud nine.||Extremely happy.2340. All that glitters is not gold.||All that glitters is not gold.
Loaded X samples:
['Text', 'The sun rises in the east and sets in the west.', 'Cats are furry animals that love to sleep.', 'Coding is a skill that can be learned with practice.', 'The world is full of wonders waiting to be explored.']
Loaded y samples:
['Summary', 'Sun rises and sets.', 'Cats love to sleep.', 'Coding can be learned with practice.', 'World is full of wonders.']
Logistic Regression Accuracy: 91.61490683229813
Random Forest Accuracy: 54.6583850931677
Naive Bayes Accuracy: 80.74534161490683
SVM Accuracy: 74.22360248447205


# Create the main window

In [30]:
root = tk.Tk()
root.title("Text Summarizer")

# Input text box
input_label = tk.Label(root, text="Enter text:")
input_label.pack()
input_text_box = scrolledtext.ScrolledText(root, width=40, height=10)
input_text_box.pack()

# Button to summarize
summarize_button = tk.Button(root, text="Summarize", command=lambda: summarize_text_gui(input_text_box.get("1.0",'end-1c'), best_model))
summarize_button.pack()

# Summary text box
summary_label = tk.Label(root, text="Summary:")
summary_label.pack()
summary_text_box = scrolledtext.ScrolledText(root, width=40, height=5)
summary_text_box.pack()

# Display model evaluation results
logistic_reg_accuracy_label = tk.Label(root, text=f"Logistic Regression Accuracy: {logistic_reg_accuracy:.2f}%")
logistic_reg_accuracy_label.pack()

random_forest_accuracy_label = tk.Label(root, text=f"Random Forest Accuracy: {random_forest_accuracy:.2f}%")
random_forest_accuracy_label.pack()

naive_bayes_accuracy_label = tk.Label(root, text=f"Naive Bayes Accuracy: {naive_bayes_accuracy:.2f}%")
naive_bayes_accuracy_label.pack()

svm_accuracy_label = tk.Label(root, text=f"SVM Accuracy: {svm_accuracy:.2f}%")
svm_accuracy_label.pack()


In [31]:
def summarize_text_gui(input_text, best_model):
    try:
        summary = summarize_text(input_text, best_model)
        summary_text_box.delete('1.0', tk.END)
        summary_text_box.insert(tk.END, summary)
    except FileNotFoundError:
        messagebox.showerror("Error", "Data file not found.")


root.mainloop()
