In [1]:
import sys
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QPushButton, QLabel, QVBoxLayout, 
    QHBoxLayout, QWidget, QLineEdit, QFileDialog, QTextEdit, QMessageBox
)
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QFont
from farasa.segmenter import FarasaSegmenter
import spacy_stanza
import nltk
from nltk import pos_tag, word_tokenize, RegexpParser
from farasa.pos import FarasaPOSTagger

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


#farasa POS tagger
pos_tagger = FarasaPOSTagger()

#initialize the farasa segmenter, to perform segmentation
segmenter = FarasaSegmenter()

# load arabic NLP pipeline from stanza
nlp = spacy_stanza.load_pipeline("ar")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
2024-12-01 18:25:30 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …

2024-12-01 18:25:55 INFO: Loading these models for language: ar (Arabic):
| Processor | Package       |
-----------------------------
| tokenize  | padt          |
| mwt       | padt          |
| pos       | padt_charlm   |
| lemma     | padt_nocharlm |
| depparse  | padt_charlm   |
| ner       | aqmar_charlm  |

2024-12-01 18:25:55 INFO: Using device: cpu
2024-12-01 18:25:55 INFO: Loading: tokenize
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-12-01 18:25:59 INFO: Loading: mwt
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-12-01 18:25:59 INFO: Loading: pos
  checkpoint = torch.load(filename, lambda storage, loc: storage)
  data = torch.load(self.filename, lambda storage, loc: storage)
  state = torch.load(filename, lambda storage, loc: storage)
2024-12-01 18:26:00 INFO: Loading: lemma
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-12-01 18:26:00 INFO: Loading: depparse
  checkpoint = torch.load(filename, lambda s

In [2]:

# app Class
class App(QMainWindow):
    def __init__(self):
        super().__init__()
        self.init_ui()

    def init_ui(self):
        # main window 
        self.setWindowTitle("lexico-semantic processing application")
        self.setGeometry(100, 100, 900, 600)
        self.setStyleSheet("background-color: #FFE4E1;")  

        
        central_widget = QWidget()
        self.setCentralWidget(central_widget)

        # main Layout
        main_layout = QVBoxLayout()

        # input text
        input_layout = QHBoxLayout()
        self.text_input = QLineEdit()
        self.text_input.setPlaceholderText(" input something ...")
        self.text_input.setStyleSheet("""
            QLineEdit {
                background-color: #FFFFFF;
                border: 2px solid #FFB6C1;
                border-radius: 10px;
                padding: 5px;
                font-size: 16px;
            }
        """)
        input_layout.addWidget(QLabel("input text:", self))
        input_layout.addWidget(self.text_input)
        main_layout.addLayout(input_layout)

        # upload file btn
        self.upload_button = QPushButton("upload file")
        self.upload_button.setStyleSheet("""
            QPushButton {
                background-color: #FFB6C1;
                color: white;
                border-radius: 15px;
                padding: 10px 20px;
                font-size: 14px;
            }
            QPushButton:hover {
                background-color: #FFA07A;
            }
        """)
        self.upload_button.clicked.connect(self.upload_file)
        main_layout.addWidget(self.upload_button, alignment=Qt.AlignCenter)

        #  area to ^print results 
        self.result_display = QTextEdit()
        self.result_display.setReadOnly(True)
        self.result_display.setPlaceholderText("results...")
        self.result_display.setStyleSheet("""
            QTextEdit {
                background-color: #FFF5F5;
                border: 2px solid #FFB6C1;
                border-radius: 10px;
                font-size: 14px;
            }
        """)
        main_layout.addWidget(self.result_display)

        # boxes with buttons >>>>>>>>>> each box wil cover a level of analysis
        small_boxes_layout = QHBoxLayout()

        # box 1: ** Morphological Analysis **
        box1_layout = QVBoxLayout()
        box1_label = QLabel("Morphological Analysis")
        box1_label.setFont(QFont("Arial", 12, QFont.Bold))
        box1_label.setStyleSheet("color: #FF69B4;")
        box1_label.setAlignment(Qt.AlignCenter)
        box1_layout.addWidget(box1_label)

        button1_1 = QPushButton("Segmentation")
        button1_1.clicked.connect(self.segmentatin)
        button1_2 = QPushButton("Morphemes Dictionary")
        button1_2.clicked.connect(self.morphemes)
        button1_3 = QPushButton("POS tag")
        button1_3.clicked.connect(self.pos_tags)
        button1_4 = QPushButton("syntax dependencies")
        button1_4.clicked.connect(self.pos_tags)

        # styling w adding buttons
        for button in [button1_1, button1_2, button1_3, button1_4]:
            button.setStyleSheet("""
                QPushButton {
                    background-color: #FFB6C1;
                    color: white;
                    border-radius: 15px;
                    padding: 5px 10px;
                    font-size: 12px;
                }
                QPushButton:hover {
                    background-color: #FFA07A;
                }
            """)
            box1_layout.addWidget(button)

        small_boxes_layout.addLayout(box1_layout)

        

       
        

        # add layouts to the main layout
        main_layout.addLayout(small_boxes_layout)
        central_widget.setLayout(main_layout)

        #initialize a variables to hold file content
        self.text = ""

    def upload_file(self):
        """Open a file dialog to select and read a file."""
        file_path, _ = QFileDialog.getOpenFileName(self, "Open File", "", "Text Files (*.txt);;All Files (*)")
        if file_path:
            with open(file_path, 'r', encoding='utf-8') as file:
                    self.text = file.read()
                    self.result_display.setText("file uploaded successfully!\n\n" + self.text)
           

#I . morphological analysis
     # 1 segmentation 
    def segmentatin(self):
        if self.text:
                segmented = segmenter.segment(self.text)
                self.result_display.setText(segmented)

        else:
            self.result_display.setText("no text to segment")

    #2  morphemes dictionary
    def morphemes(self):
        if self.text:
            try:
                nlp = spacy_stanza.load_pipeline("ar")
                
                doc = nlp(self.text)

                #  morphological information
                morph_data = [f"Token: {token.text}, Morph: {token.morph}" for token in doc]
                # Display the results in the QTextEdit
                self.result_display.setText("\n".join(morph_data))
            except Exception as e:
                self.result_display.setText("error in collecting morphological information ")
        else:
             self.result_display.setText("no text to analyze!")


    def pos_tags(self):
        if self.text:  
            try:
                pos_tagger = FarasaPOSTagger(interactive=True)  # initialize farasa POS tagger
                #tokenization
                tokens = word_tokenize(self.text)
                # tag each token
                tags = []
                for token in tokens:
                    tag = pos_tagger.tag(token)  # This returns a tuple like ('word', 'POS_tag')
                    tags.tag(token)
                    # display
                    self.result_display.setText("\n".join(tags))
            except Exception as e:
                  self.result_display.setText("error in tagging")
        else:
            self.result_display.setText("no text to tag")





        
    def clear_result(self):
        self.result_display.clear()
 

    def count_words(self):
        if self.text:
            word_count = len(self.uploaded_content.split())
            self.result_display.setText(" nu,ber of word:", word_count)
        else:
            self.result_display.setText("no content to count 🙁")


# tun the app
if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = App()
    window.show()
    sys.exit(app.exec_())


2024-12-01 18:26:34 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …

2024-12-01 18:26:36 INFO: Loading these models for language: ar (Arabic):
| Processor | Package       |
-----------------------------
| tokenize  | padt          |
| mwt       | padt          |
| pos       | padt_charlm   |
| lemma     | padt_nocharlm |
| depparse  | padt_charlm   |
| ner       | aqmar_charlm  |

2024-12-01 18:26:36 INFO: Using device: cpu
2024-12-01 18:26:36 INFO: Loading: tokenize
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-12-01 18:26:36 INFO: Loading: mwt
2024-12-01 18:26:36 INFO: Loading: pos
2024-12-01 18:26:37 INFO: Loading: lemma
2024-12-01 18:26:37 INFO: Loading: depparse
2024-12-01 18:26:38 INFO: Loading: ner
2024-12-01 18:26:39 INFO: Done loading processors!
  doc = self._ensure_doc(text)
Words: ['القطط', 'هي', 'حيوانات', 'أليفة', 'ل', 'طيفة', 'و', 'محبوبة', 'تعيش', 'مع', 'الإنسان', 'منذ', 'آلاف', 'السنين', '.', 'تتميز', 'القطط', 'ب', 'رشاقة', 'ها', 'و', 'ذكاء', 'ها', '،', 'و', 'لدي', 'ها', 'قدرة', 'رائعة', 'على', 'التكيف', 'مع', '

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …

2024-12-01 18:27:06 INFO: Loading these models for language: ar (Arabic):
| Processor | Package       |
-----------------------------
| tokenize  | padt          |
| mwt       | padt          |
| pos       | padt_charlm   |
| lemma     | padt_nocharlm |
| depparse  | padt_charlm   |
| ner       | aqmar_charlm  |

2024-12-01 18:27:06 INFO: Using device: cpu
2024-12-01 18:27:06 INFO: Loading: tokenize
2024-12-01 18:27:06 INFO: Loading: mwt
2024-12-01 18:27:06 INFO: Loading: pos
2024-12-01 18:27:06 INFO: Loading: lemma
2024-12-01 18:27:06 INFO: Loading: depparse
2024-12-01 18:27:07 INFO: Loading: ner
2024-12-01 18:27:08 INFO: Done loading processors!


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
