In [2]:
import pickle
import pandas as pd

from nltk.corpus import stopwords

spanish_stopwords = stopwords.words('spanish')

from nltk.stem.snowball import SnowballStemmer



In [3]:
df = pd.read_csv('Data/clean_data.csv', index_col=0)

In [4]:
print(df.loc[256, 'tweet'])
print(df.loc[244, 'tweet'])
df.loc[195, 'tweet']

El sentido de la vida ya no es 42 -- Light Mental edición especial @TheBridge_Tech  https://t.co/z7hOVln8Bp
Gracias @HeavyMental_es por la tarde de ayer y a todos los asistentes ¡Éxito absoluto! 🎙👏


'🕣 TIC, TAC: MAÑANA os vemos a todos a las 7.30pm en nuestro evento en directo en MADRID en @TheBridge_Tech   🚨Para los que no vayáis, TRANQUILOS, seguramente el martes lo emitamos para todos vosotros :D'

In [5]:
with open('model/finished_model.model', "rb") as archivo_entrada:
    pipeline_importada = pickle.load(archivo_entrada)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
import re

signos = re.compile("(\.)|(\;)|(\:)|(\!)|(\?)|(\¿)|(\@)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)")

def signs_tweets(tweet):
    return signos.sub('', tweet.lower())

def remove_links(df):
    return " ".join(['{link}' if ('http') in word else word for word in df.split()])

def remove_stopwords(df):
    return " ".join([word for word in df.split() if word not in spanish_stopwords])

def spanish_stemmer(x):
    stemmer = SnowballStemmer('spanish')
    return " ".join([stemmer.stem(word) for word in x.split()])



In [7]:
text = pd.Series('El sentido de la vida ya no es 42 -- Light Mental edición especial @TheBridge_Tech  https://t.co/z7hOVln8Bp')
test_clean = pd.DataFrame(text, columns=['content'])

text2 = pd.Series('Gracias @HeavyMental_es por la tarde de ayer y a todos los asistentes ¡Éxito absoluto! 🎙👏')
test_clean2 = pd.DataFrame(text2, columns=['content'])

text3 = pd.Series('🕣 TIC, TAC: MAÑANA os vemos a todos a las 7.30pm en nuestro evento en directo en MADRID en @TheBridge_Tech   🚨Para los que no vayáis, TRANQUILOS, seguramente el martes lo emitamos para todos vosotros :D')
test_clean3 = pd.DataFrame(text3, columns=['content'])

# Signos de puntuacion
test_clean['content_clean'] = test_clean['content'].apply(signs_tweets)
test_clean2['content_clean'] = test_clean2['content'].apply(signs_tweets)
test_clean3['content_clean'] = test_clean3['content'].apply(signs_tweets)

# Eliminamos links
test_clean['content_clean'] = test_clean['content_clean'].apply(remove_links)
test_clean2['content_clean'] = test_clean2['content_clean'].apply(remove_links)
test_clean3['content_clean'] = test_clean3['content_clean'].apply(remove_links)

# Nos cargamos stopwords
test_clean['content_clean'] = test_clean['content_clean'].apply(remove_stopwords)
test_clean2['content_clean'] = test_clean2['content_clean'].apply(remove_stopwords)
test_clean3['content_clean'] = test_clean3['content_clean'].apply(remove_stopwords)

# Aplicamos el Stemmer
test_clean['content_clean'] = test_clean['content_clean'].apply(spanish_stemmer)
test_clean2['content_clean'] = test_clean2['content_clean'].apply(spanish_stemmer)
test_clean3['content_clean'] = test_clean3['content_clean'].apply(spanish_stemmer)

In [10]:
predictions = pipeline_importada.predict(test_clean['content_clean'])
test_clean['Polarity'] = pd.Series(predictions)
test_clean

Unnamed: 0,content,content_clean,Polarity
0,El sentido de la vida ya no es 42 -- Light Men...,vid -- light mental edicion especial thebridge...,0


In [18]:
predictions = pipeline_importada.predict(test_clean2['content_clean'])
test_clean2['Polarity'] = pd.Series(predictions)
test_clean2

Unnamed: 0,content,content_clean,Polarity
0,Gracias @HeavyMental_es por la tarde de ayer y...,graci heavymental_ tard ayer asistent ¡exit ab...,0


In [19]:
predictions = pipeline_importada.predict(test_clean3['content_clean'])
test_clean3['Polarity'] = pd.Series(predictions)
test_clean3

Unnamed: 0,content,content_clean,Polarity
0,"🕣 TIC, TAC: MAÑANA os vemos a todos a las 7.30...",🕣 tic tac mañan vem pm event direct madr thebr...,0


In [20]:
predictions = pipeline_importada.predict_proba(test_clean['content_clean'])
test_clean['Polarity_Pos'] = pd.Series(predictions[0][0])
test_clean['Polarity_Neg'] = pd.Series(predictions[0][1])
test_clean

Unnamed: 0,content,content_clean,Polarity,Polarity_Pos,Polarity_Neg
0,El sentido de la vida ya no es 42 -- Light Men...,vid -- light mental edicion especial thebridge...,0,0.764106,0.235894


In [21]:
predictions = pipeline_importada.predict_proba(test_clean2['content_clean'])
test_clean2['Polarity_Pos'] = pd.Series(predictions[0][0])
test_clean2['Polarity_Neg'] = pd.Series(predictions[0][1])
test_clean2

Unnamed: 0,content,content_clean,Polarity,Polarity_Pos,Polarity_Neg
0,Gracias @HeavyMental_es por la tarde de ayer y...,graci heavymental_ tard ayer asistent ¡exit ab...,0,0.981161,0.018839


In [22]:
predictions = pipeline_importada.predict_proba(test_clean3['content_clean'])
test_clean3['Polarity_Pos'] = pd.Series(predictions[0][0])
test_clean3['Polarity_Neg'] = pd.Series(predictions[0][1])
test_clean3

Unnamed: 0,content,content_clean,Polarity,Polarity_Pos,Polarity_Neg
0,"🕣 TIC, TAC: MAÑANA os vemos a todos a las 7.30...",🕣 tic tac mañan vem pm event direct madr thebr...,0,0.666356,0.333644
