In [185]:
print("Hello, World!")

Hello, World!


# DDD in git hub
1. https://github.com/ddd-by-examples/library
2. https://github.com/ketan-gote/ddd-example
3. https://github.com/ddd-by-examples/factory
4. https://github.com/m-khooryani/OverCloudAirways-DDD-sample

In [186]:
# ตัดคำ
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.manifold import TSNE
%matplotlib inline

In [187]:
import nltk

# ดาวน์โหลด resource ที่จำเป็น
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to /Users/toy/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/toy/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# **loading** **data**

In [188]:
# อ่านไฟล์ README สำหรับ requirement
with open("./business-requirements/1-ddd-by-examples/README.md", "r", encoding='utf-8') as file:
	requirement = file.read()

print(requirement)

[![CircleCI](https://circleci.com/gh/ddd-by-examples/library.svg?style=svg)](https://circleci.com/gh/ddd-by-examples/library)
[![Code Coverage](https://codecov.io/gh/ddd-by-examples/library/branch/master/graph/badge.svg)](https://codecov.io/gh/ddd-by-examples/library)

# Table of contents

- [Table of contents](#table-of-contents)
  - [About](#about)
  - [Domain description](#domain-description)
  - [General assumptions](#general-assumptions)
    - [Process discovery](#process-discovery)
    - [Project structure and architecture](#project-structure-and-architecture)
    - [Aggregates](#aggregates)
    - [Events](#events)
    - [Events in Repositories](#events-in-repositories)
    - [ArchUnit](#archunit)
    - [Functional thinking](#functional-thinking)
      - [Immutable objects](#immutable-objects)
      - [Pure functions](#pure-functions)
      - [Type system](#type-system)
      - [Monads](#monads)
      - [Pattern Matching](#pattern-matching)
    - [(No) ORM](#no-orm)
    - [Archit

In [189]:
# ฟังก์ชั่น clean_text สำหรับการทำ text preprocessing
def clean_text(text):
    # 1. แปลงเป็นตัวพิมพ์เล็ก (lowercase)
    text = text.lower()
    
	# 2. ลบสัญลักษณ์พิเศษและตัวเลขที่ไม่จำเป็น
    text = re.sub(r'[^\w\s]', '', text)  # ลบเครื่องหมายพิเศษ เช่น !, @, #, $
    text = re.sub(r'\d+', '', text)      # ลบตัวเลข
    
    return text

# ทำ text preprocessing กับ requirement
cleaned_requirement = clean_text(requirement)
print(cleaned_requirement)
    

circlecihttpscirclecicomghdddbyexampleslibrarysvgstylesvghttpscirclecicomghdddbyexampleslibrary
code coveragehttpscodecovioghdddbyexampleslibrarybranchmastergraphbadgesvghttpscodecovioghdddbyexampleslibrary

 table of contents

 table of contentstableofcontents
   aboutabout
   domain descriptiondomaindescription
   general assumptionsgeneralassumptions
     process discoveryprocessdiscovery
     project structure and architectureprojectstructureandarchitecture
     aggregatesaggregates
     eventsevents
     events in repositorieseventsinrepositories
     archunitarchunit
     functional thinkingfunctionalthinking
       immutable objectsimmutableobjects
       pure functionspurefunctions
       type systemtypesystem
       monadsmonads
       pattern matchingpatternmatching
     no ormnoorm
     architecturecode gaparchitecturecodegap
     modelcode gapmodelcodegap
       placing on holdplacingonhold
       springspring
     teststests
   how to contributehowtocontribute
   how to bu

In [190]:
# Tokenization - แยกคำออกจากประโยค
tokens = word_tokenize(cleaned_requirement)

print(len(tokens))

3954


# เวอร์ชันที่ใช้ TfidfVectorizer

In [191]:
# ใช้ TfidfVectorizer เพื่อสกัดคำสำคัญ
vectorizer = TfidfVectorizer(stop_words='english', max_features=50, max_df=2, smooth_idf=True)
tfidf_matrix = vectorizer.fit_transform([' '.join(tokens)])
feature_names = vectorizer.get_feature_names_out()
tfidf_scores = tfidf_matrix.toarray().flatten()

In [192]:
# สร้าง DataFrame สำหรับคำสำคัญและคะแนน
keywords_df = pd.DataFrame({'keyword': feature_names, 'score': tfidf_scores})
keywords_df = keywords_df.sort_values(by='score', ascending=False)

In [206]:
# แสดงคำสำคัญ
print("TF-IDF Keywords:\n", keywords_df)

TF-IDF Keywords:
                    keyword     score
4                     book  0.327544
39                  patron  0.317911
31                    java  0.308277
43                  return  0.240842
42                  public  0.240842
18                  domain  0.231208
20                  events  0.202307
26                    hold  0.183040
2             architecture  0.173406
19                   event  0.163772
13                   class  0.154139
11                business  0.154139
36                   model  0.144505
0               aggregates  0.144505
7         bookplacedonhold  0.134871
34                    like  0.134871
41                 project  0.125238
21                 example  0.125238
1              application  0.125238
14                    code  0.115604
23               following  0.105970
35                    look  0.105970
48                     use  0.105970
30          infrastructure  0.096337
40                 private  0.096337
10                  