In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from firebase_admin import credentials, initialize_app, db, firestore

In [None]:
cred = credentials.Certificate('mindfit-9952f-firebase-adminsdk-1nq0u-5e10a7a8fb.json')
initialize_app(cred)

In [None]:
data = firestore.client()
collection_name = 'journals'
collection_ref = data.collection(collection_name)

In [4]:
docs = collection_ref.get()

In [5]:
firestore_data = []

for doc in docs:
    firestore_data.append(doc.to_dict())

data_df = pd.DataFrame(firestore_data)
print(data_df)

      title                             content   userId  \
0  Be Happy  Today' gonna be a great day for me  Sanchit   

                         timestamp  
0 2024-02-20 03:59:50.903000+00:00  


In [6]:
train_dat = pd.read_csv('training.csv')

print(train_dat.head())

                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3


In [7]:
train_data, test_data, train_labels, test_labels = train_test_split(train_dat['text'], train_dat['label'], test_size=0.2, random_state=42)

In [8]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
train_features = tfidf_vectorizer.fit_transform(train_data)
test_features = tfidf_vectorizer.transform(test_data)

In [9]:
model = MultinomialNB()
model.fit(train_features, train_labels)

In [10]:
predictions = model.predict(test_features)

accuracy = accuracy_score(test_labels, predictions)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(test_labels, predictions))

Accuracy: 0.73
              precision    recall  f1-score   support

           0       0.73      0.94      0.82       946
           1       0.66      0.97      0.79      1021
           2       1.00      0.14      0.24       296
           3       0.92      0.54      0.68       427
           4       0.92      0.42      0.58       397
           5       1.00      0.02      0.03       113

    accuracy                           0.73      3200
   macro avg       0.87      0.51      0.52      3200
weighted avg       0.79      0.73      0.68      3200



In [11]:
d = data_df['content']
print(d)
vectorized_data = tfidf_vectorizer.transform(d)

prediction_for_data = model.predict(vectorized_data)
print(prediction_for_data)

0    Today' gonna be a great day for me
Name: content, dtype: object
[1]


In [12]:
moods = {
    0 : 'sadness',
    1 : 'happiness',
    2 : 'love',
    3 : 'anger',
    4 : 'fear'
}

print(moods[prediction_for_data[0]])

joy


In [13]:
import joblib

joblib.dump(tfidf_vectorizer, 'tfdif_vectorizer.pkl')
joblib.dump(model, 'text_classifier_model.pkl')

['text_classifier_model.pkl']