In [50]:
import pandas as pd
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

import joblib


[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/spencer/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/spencer/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [51]:
df = pd.read_csv('data/social_media/sentiment_analysis.csv')
df.head()


Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram


In [52]:
sent_df = df[['text', 'sentiment']]
sent_df.head()

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative


In [53]:
sent_df['text'] = sent_df['text'].str.lower()
sent_df['text'] = sent_df['text'].astype(str)
sent_df['tokens'] = sent_df['text'].apply(nltk.word_tokenize)
print(sent_df.info())

# Remove stopwords
stopwords = nltk.corpus.stopwords.words('english')
sent_df['tokens'] = sent_df['tokens'].apply(lambda x: [word for word in x if word not in stopwords])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 499 entries, 0 to 498
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       499 non-null    object
 1   sentiment  499 non-null    object
 2   tokens     499 non-null    object
dtypes: object(3)
memory usage: 11.8+ KB
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sent_df['text'] = sent_df['text'].str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sent_df['text'] = sent_df['text'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sent_df['tokens'] = sent_df['text'].apply(nltk.word_tokenize)
A value is trying to be set on a copy of a slice fr

In [54]:
X = sent_df['text']
y = sent_df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [55]:
vectorizer = TfidfVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)


In [64]:
model_svc = SVC()
model_svc.fit(X_train_vectors, y_train)
# Save the model using joblib
joblib.dump(model_svc, 'svc_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']

In [57]:
y_predict = model_svc.predict(X_test_vectors)

In [58]:
print("Classification Report:")
print(classification_report(y_test, y_predict))

print("Accuracy Score:", accuracy_score(y_test, y_predict))

Classification Report:
              precision    recall  f1-score   support

    negative       1.00      0.46      0.63        28
     neutral       0.66      0.95      0.78        44
    positive       0.83      0.68      0.75        28

    accuracy                           0.74       100
   macro avg       0.83      0.70      0.72       100
weighted avg       0.80      0.74      0.73       100

Accuracy Score: 0.74


In [59]:
# Load the saved model
loaded_model = joblib.load('svc_model.pkl')

# You can now use the loaded model to make predictions
predictions = loaded_model.predict(X_test)

['good. thank you.', 'the red wire.', 'no. you are wrong. install the blue wire']


In [60]:
text_vectors = vectorizer.transform(new_text)

In [61]:
model_svc.predict(text_vectors)

array(['positive', 'neutral', 'neutral'], dtype=object)