In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from data_cleaning_import import clean_create_vectors


In [2]:
# Change to cleaned data
df = pd.read_csv("data.csv")
df = clean_create_vectors(df)

X_text = df["journal"]
#y = df["emotion_vectors"]

y = df.drop(columns=["journal", "emotion_vectors", "activity_vectors"])

# TF-IDF
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X = vectorizer.fit_transform(X_text)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y_train)
print(type(y_train))
#logistic regression 
base_model = LogisticRegression(max_iter=1000)
multi_model = MultiOutputClassifier(base_model)

multi_model.fit(X_train, y_train)
y_pred = multi_model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=y.columns))


#metrics
f1_micro = f1_score(y_test, y_pred, average='micro')
f1_macro = f1_score(y_test, y_pred, average='macro')

precision_micro = precision_score(y_test, y_pred, average='micro')
recall_micro = recall_score(y_test, y_pred, average='micro')

exact_match = accuracy_score(y_test, y_pred)

print("F1 Score (Micro):", f1_micro)
print("F1 Score (Macro):", f1_macro)
print("Precision (Micro):", precision_micro)
print("Recall (Micro):", recall_micro)
print("Exact Match Accuracy:", exact_match)

      afraid  angry  anxious  ashamed  awkward  bored   calm  confused  \
998    False  False    False    False    False   True  False     False   
254    False  False    False    False    False  False  False     False   
1074   False  False    False    False    False  False  False     False   
643    False  False    False    False    False  False   True     False   
1452   False  False    False    False    False  False  False     False   
...      ...    ...      ...      ...      ...    ...    ...       ...   
1130   False  False    False    False    False  False   True     False   
1294   False  False    False    False    False  False   True     False   
860    False  False    False    False    False  False  False     False   
1459   False  False    False    False    False  False  False     False   
1126   False  False    False    False    False  False  False     False   

      disgusted  excited  ...  family   food  friends    god  health   love  \
998       False    False  ...   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
