In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
 
# Učitaj podatke
train = pd.read_csv('train-2.csv')
test1 = pd.read_csv('test-1.csv')
test2 = pd.read_csv('test-2.csv')
test3 = pd.read_csv('test-3.csv')
 
# Priprema podataka
X_train = train['Sentence'].astype(str)
y_train = train['Label']
X_test1 = test1['Sentence'].astype(str)
y_test1 = test1['Label']
X_test2 = test2['Sentence'].astype(str)
y_test2 = test2['Label']
X_test3 = test3['Sentence'].astype(str)
y_test3 = test3['Label']
 
# Vektorizacija SAMO na train skupu!
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test1_vec = vectorizer.transform(X_test1)
X_test2_vec = vectorizer.transform(X_test2)
X_test3_vec = vectorizer.transform(X_test3)
 
def metrics(y_true, y_pred):
   return (
       f"Precision={precision_score(y_true, y_pred, average='macro', zero_division=0):.3f}, "
       f"Recall={recall_score(y_true, y_pred, average='macro', zero_division=0):.3f}, "
       f"F1={f1_score(y_true, y_pred, average='macro', zero_division=0):.3f}, "
       f"Accuracy={accuracy_score(y_true, y_pred):.3f}"
   )
 
results = []
 
# Logistic Regression na train
logreg = LogisticRegression(max_iter=1000, C=1, random_state=42)
logreg.fit(X_train_vec, y_train)
results.append([
   "1.a.i", "Machine learning", "Logistic regression", "Train2",
   metrics(y_train, logreg.predict(X_train_vec)),
   metrics(y_test1, logreg.predict(X_test1_vec)),
   metrics(y_test2, logreg.predict(X_test2_vec)),
   metrics(y_test3, logreg.predict(X_test3_vec))
])
 
# Logistic Regression na test
results.append([
   "1.a.ii", "Machine learning", "Logistic regression", "Test",
   metrics(y_test1, logreg.predict(X_test1_vec)),
   metrics(y_test2, logreg.predict(X_test2_vec)),
   metrics(y_test3, logreg.predict(X_test3_vec))
])
 
# Decision Tree na train
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train_vec, y_train)
results.append([
   "1.b.i", "Machine learning", "Decision tree classifier", "Train2",
   metrics(y_train, tree.predict(X_train_vec)),
   metrics(y_test1, tree.predict(X_test1_vec)),
   metrics(y_test2, tree.predict(X_test2_vec)),
   metrics(y_test3, tree.predict(X_test3_vec))
])
 
# Decision Tree na test
results.append([
   "1.b.ii", "Machine learning", "Decision tree classifier", "Test",
   metrics(y_test1, tree.predict(X_test1_vec)),
   metrics(y_test2, tree.predict(X_test2_vec)),
   metrics(y_test3, tree.predict(X_test3_vec))
])
 
# GENERIRAJ MARKDOWN TABLICU
header = (
   "| #      | method           | algorithm                | skup   | Test 1                                                  | Test 2                                                  | Test 3                                                  |\n"
   "|--------|------------------|--------------------------|--------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|\n"
)
lines = []
for row in results:
   # Ako je red 'Test', prvi rezultat je za Test 1, drugi za Test 2, treći za Test 3
   # Ako je red 'Train2', prvi je za train, ostala tri su za testove (ali možeš ih ostaviti ili staviti crticom ako želiš)
   line = f"| {row[0]} | {row[1]} | {row[2]} | {row[3]} | {row[4]} | {row[5]} | {row[6]} |"
   lines.append(line)
 
with open("results.md", "w", encoding="utf-8") as f:
   f.write(header)
   for line in lines:
       f.write(line + "\n")