In [60]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# อ่านข้อมูล
df = pd.read_csv('./TABLE3.csv')

# กรองข้อมูลที่มี Agency_Name ซ้ำน้อย
agency_counts = df['Agency_Name'].value_counts()
top_agencies = agency_counts[agency_counts >= 2].index
df_filtered = df[df['Agency_Name'].isin(top_agencies)]

# รวม feature ต่างๆ
df_filtered['combined_text'] = df_filtered.apply(
    lambda row: f"{row['Title']} {row['Index_Terms']} {row['Subject_Field']} {row['Abstract']}", 
    axis=1
)

# Encode Agency_Name
le = LabelEncoder()
df_filtered['Agency_Name_Encoded'] = le.fit_transform(df_filtered['Agency_Name'])

# แบ่งข้อมูล
X_train, X_test, y_train, y_test = train_test_split(
    df_filtered['combined_text'], 
    df_filtered['Agency_Name_Encoded'], 
    test_size=0.2, 
    random_state=42
)

# เตรียม Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train model
model = LinearSVC(random_state=42)
model.fit(X_train_tfidf, y_train)

# ทำนาย
y_pred = model.predict(X_test_tfidf)

# ประเมินผล
# print("Classification Report:")
# print(classification_report(
#     y_test, 
#     y_pred, 
#     target_names=le.classes_,
#     zero_division=1
# ))

print(f"\nAccuracy: {accuracy_score(y_test, y_pred):.2%}")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['combined_text'] = df_filtered.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Agency_Name_Encoded'] = le.fit_transform(df_filtered['Agency_Name'])



Accuracy: 31.82%


In [63]:
# ฟังก์ชันทำนาย
def predict_agency(text):
    text_tfidf = vectorizer.transform([text])
    prediction = model.predict(text_tfidf)
    return le.inverse_transform(prediction)[0]

# ตัวอย่างการทำนาย
sample_texts = [
    # จากเอกสารวิจัยด้านวิศวกรรม
    "Prediction of surface roughness in ball-end milling process",
    "Experimental study of cutting forces in machining",
    "Analysis of dynamic cutting force in milling operations",
    
    # หัวข้อด้านการแพทย์และทันตกรรม
    "Patient-based outcomes following surgical implant placements",
    "Evaluation of dental implant success rates",
    "Clinical study of postoperative oral health quality",
    
    # หัวข้อด้านธุรกิจและเศรษฐศาสตร์
    "Exploring the Effect of Religious Piety on Corporate Governance",
    "Analysis of anti-takeover provisions in corporations",
    "Impact of religious identification on business management",
    
    # หัวข้อเฉพาะทาง
    "Mathematical modeling of industrial processes",
    "Advanced computational methods in engineering",
    "Innovative approaches to scientific research",
    
    # หัวข้อที่เกี่ยวข้องกับเทคโนโลยี
    "Development of predictive algorithms in machine learning",
    "Artificial intelligence applications in industrial processes",
    "Advanced data analysis techniques",
]

# ทำนายทีละหลายๆ ข้อ
for text in sample_texts:
    print(f"Text: {text}")
    print(f"Predicted Agency: {predict_agency(text)}\n")

Text: Prediction of surface roughness in ball-end milling process
Predicted Agency: Chulalongkorn University

Text: Experimental study of cutting forces in machining
Predicted Agency: Chulalongkorn University Thailand National Organization

Text: Analysis of dynamic cutting force in milling operations
Predicted Agency: Chulalongkorn University Thailand National Organization

Text: Patient-based outcomes following surgical implant placements
Predicted Agency: Chulalongkorn University

Text: Evaluation of dental implant success rates
Predicted Agency: Chulalongkorn University

Text: Clinical study of postoperative oral health quality
Predicted Agency: Chulalongkorn University

Text: Exploring the Effect of Religious Piety on Corporate Governance
Predicted Agency: Chulalongkorn University Thailand National Organization

Text: Analysis of anti-takeover provisions in corporations
Predicted Agency: Australian University Chulalongkorn University University

Text: Impact of religious identific

In [64]:
print(classification_report(
    y_test, 
    y_pred, 
))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         1
           4       1.00      1.00      1.00         1
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         4
          12       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          17       1.00      0.75      0.86         4
          19       0.00      0.00      0.00         1
          20       1.00      0.50      0.67         2
          21       0.00      0.00      0.00         1
          23       0.00      0.00      0.00         0
          26       0.00      0.00      0.00         1
          27       0.00      0.00      0.00         1
          29       0.00      0.00      0.00         1
          32       1.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
