### Analizando los Sentimientos

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.metrics import accuracy_score, classification_report

In [10]:
# Cargamos el data set
ruta = r"..\data\raw\playstore_reviews.csv"
df = pd.read_csv(ruta)

In [11]:
df.head()

Unnamed: 0,package_name,review,polarity
0,com.facebook.katana,privacy at least put some option appear offli...,0
1,com.facebook.katana,"messenger issues ever since the last update, ...",0
2,com.facebook.katana,profile any time my wife or anybody has more ...,0
3,com.facebook.katana,the new features suck for those of us who don...,0
4,com.facebook.katana,forced reload on uploading pic on replying co...,0


In [14]:
df.columns

Index(['package_name', 'review', 'polarity'], dtype='object')

In [13]:
df["review"] = df["review"].astype(str).str.strip().str.lower()

In [8]:
X = df["review"]
y = df["polarity"]

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=18
)

In [15]:
#  Vectorización del texto
vec_model = CountVectorizer(stop_words = "english")
X_train = vec_model.fit_transform(X_train).toarray()
X_test = vec_model.transform(X_test).toarray()

In [21]:
#  Entrenar modelo (Naive Bayes)
model = MultinomialNB(alpha=0.5)
model.fit(X_train, y_train)

0,1,2
,alpha,0.5
,force_alpha,True
,fit_prior,True
,class_prior,


In [22]:
#  Evaluar el modelo
y_pred = model.predict(X_test)

In [23]:
accuracy_score(y_test, y_pred)

0.8100558659217877

In [24]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       114
           1       0.76      0.69      0.73        65

    accuracy                           0.81       179
   macro avg       0.80      0.78      0.79       179
weighted avg       0.81      0.81      0.81       179



### Probamos hiperparametros

In [25]:
# Definir el modelo base
nb = MultinomialNB()

# Definir la grilla de hiperparámetros
param_grid = {'alpha': [0.01, 0.1, 0.5, 1.0, 2.0, 5.0]}

# Búsqueda con validación cruzada
grid_search = GridSearchCV(nb, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Mejor modelo
best_nb = grid_search.best_estimator_

print("Mejor alpha encontrado:", grid_search.best_params_['alpha'])

Mejor alpha encontrado: 0.01


In [26]:
y_pred = best_nb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.7988826815642458
              precision    recall  f1-score   support

           0       0.80      0.90      0.85       114
           1       0.78      0.62      0.69        65

    accuracy                           0.80       179
   macro avg       0.79      0.76      0.77       179
weighted avg       0.80      0.80      0.79       179



# Implementamos el modelo en las otras implementaciones

In [30]:
model_gaussian = GaussianNB()
model_gaussian.fit(X_train, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [31]:
# Entrenamos el modelo con Gaussian
y_pred = model_gaussian.predict(X_test)

In [32]:
accuracy_score(y_test, y_pred)

0.7877094972067039

In [33]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.87      0.84       114
           1       0.74      0.65      0.69        65

    accuracy                           0.79       179
   macro avg       0.77      0.76      0.76       179
weighted avg       0.78      0.79      0.78       179



# Implementamos el modelo con Bernolli

In [34]:
model_Bernoulli = BernoulliNB()
model_Bernoulli.fit(X_train, y_train)

0,1,2
,alpha,1.0
,force_alpha,True
,binarize,0.0
,fit_prior,True
,class_prior,


In [35]:
# Entrenamos el modelo con Bernolli
y_pred = model_Bernoulli.predict(X_test)

In [36]:
accuracy_score(y_test, y_pred)

0.7150837988826816

In [37]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.95      0.81       114
           1       0.77      0.31      0.44        65

    accuracy                           0.72       179
   macro avg       0.74      0.63      0.62       179
weighted avg       0.73      0.72      0.67       179



# Conclusion :
> - El mejor modelo es el de MultinomialNB pues es el que ha dado mejor resultado, no ha dado el resultado prefecto pero es el que mas se acerca.