# 1. First Model

## determine if its motivational/not
#### w/ logistic regression model


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv('datasets/binaryDataFirstModel.csv')

X = df['quote']
y = df['label']

#vectorize
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2), stop_words='english')
X_vec = vectorizer.fit_transform(X)



#splitting
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

#logreg model
model = LogisticRegression()
model.fit(X_train, y_train)

#eval
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


#save to pickle (optional)
import pickle
with open('binary_quote_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)



#test
def predict_quote(quote):
    quote_clean = quote.lower()
    vec = vectorizer.transform([quote_clean])
    pred = model.predict(vec)[0]
    return "Motivational (1)" if pred == 1 else "Not Motivational (0)"

print(predict_quote("believe in yourself and all that you are"))
print(predict_quote("this movie was a complete waste of time"))


Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.96      0.94       203
           1       0.96      0.92      0.94       197

    accuracy                           0.94       400
   macro avg       0.94      0.94      0.94       400
weighted avg       0.94      0.94      0.94       400

Confusion Matrix:
 [[195   8]
 [ 16 181]]
Motivational (1)
Not Motivational (0)


# 2. Second Model
## Nentuin categorynya
#### w/ TF-IDF + logistic regression model

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv('datasets/cleanedDataSecondModel_merged.csv') # ini yg diganti" sama merged dan dataset sebelumnya

X = df['quote']
y = df['category_encoded']

#TFIDF 
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2), stop_words='english')
X_vec = vectorizer.fit_transform(X)

#splitting
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

#logreg model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

#eval
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


#save to pickle (optional)
import pickle
with open('models/category_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('models/category_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)



#decode category
with open('models/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)


#test
def predict_category(quote):
    quote_clean = quote.lower()
    vec = vectorizer.transform([quote_clean])
    pred = model.predict(vec)[0]
    return label_encoder.inverse_transform([pred])[0]

print(predict_category("Success is not final, failure is not fatal"))
print(predict_category("Let yourself rest and find peace"))


Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         2
           2       1.00      1.00      1.00         1
           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         3
           5       0.00      0.00      0.00         1
           6       0.08      0.83      0.15         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         1
           9       1.00      0.50      0.67         4
          12       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         1
          17       0.00      0.00      0.00         1
          18       0.00      0.00      0.00         1
          20       0.50      0.25      0.33         4
   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [2]:
import pickle

with open('models/binary_quote_model.pkl', 'rb') as f:
    binary_model = pickle.load(f)
with open('models/tfidf_vectorizer.pkl', 'rb') as f:
    binary_vectorizer = pickle.load(f)
with open('models/category_model.pkl', 'rb') as f:
    category_model = pickle.load(f)
with open('models/category_vectorizer.pkl', 'rb') as f:
    category_vectorizer = pickle.load(f)
with open('models/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

#tes gabungan
def classify_quote(quote):
    quote_clean = quote.lower().strip()
    
    #model 1
    vec_bin = binary_vectorizer.transform([quote_clean])
    binary_pred = binary_model.predict(vec_bin)[0]

    if binary_pred == 0:
        return {
            "label": "Not Motivational",
            "category": None
        }
    
    #model 2
    vec_cat = category_vectorizer.transform([quote_clean])
    category_pred = category_model.predict(vec_cat)[0]
    category_label = label_encoder.inverse_transform([category_pred])[0]

    return {
        "label": "Motivational",
        "category": category_label
    }

#conoth
print(classify_quote("believe in yourself and all that you are"))
print(classify_quote("this movie was a total failure"))
print(classify_quote("Let yourself rest and find peace"))


{'label': 'Motivational', 'category': 'believe in yourself'}
{'label': 'Not Motivational', 'category': None}
{'label': 'Motivational', 'category': 'peace'}


In [4]:
#pakai input user
def run_interactive_classifier():
    print("=== Quote Classifier ===")
    print("Type in your qoute:")
    print("Type 'exit' to quit.\n")
    
    while True:
        user_input = input("Enter a quote: ").strip()
        if user_input.lower() == 'exit':
            print("Goodbye")
            break
        
        print(f"\"{user_input}\"")
        result = classify_quote(user_input)
        print("\nResult:")
        print(">>Quote Type:", result["label"])
        if result["category"]:
            print(">>Category  :", result["category"])
        print("\n")
run_interactive_classifier()


=== Quote Classifier ===
Type in your qoute:
Type 'exit' to quit.

"i believe in you and everything that you do"

Result:
>>Quote Type: Motivational
>>Category  : believe in yourself


Goodbye


mungkin next phase bisa ngasih opsi apakah quote yg user input mau dimasukin ke dataset juga (biar makin akurat?) baik yg positive maupun yg negative

coba neural network/bert