In [1]:
import re
import json
import pandas as pd

from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import iplot

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from sklearn.metrics import silhouette_score

In [2]:
path = 'dataset_requerimientos.csv'
data = pd.read_csv(path)
data.head()

Unnamed: 0,PROJECT,REQUIREMENT,FINAL_LABEL
0,16,Poder crear un usuario y acceder a través de é...,NF
1,16,Poder crear un perfil personal e individual a ...,F
2,16,Acceder a la aplicación y a sus funcionalidade...,NF
3,16,Todos los datos introducidos podrán ser leídos...,NF
4,16,"Poder leer, eliminar, editar o incluir cualqui...",F


### Clasificación - datos train y test

In [3]:
data.FINAL_LABEL.value_counts()

F     300
NF     89
Name: FINAL_LABEL, dtype: int64

In [4]:
data.FINAL_LABEL.value_counts(normalize=True) 

F     0.771208
NF    0.228792
Name: FINAL_LABEL, dtype: float64

In [5]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=50)

print(f"El cojunto de datos de entrenamiento tiene {train.shape[0]} registros")
print(f"El cojunto de datos de prueba tiene {test.shape[0]} registros")

El cojunto de datos de entrenamiento tiene 311 registros
El cojunto de datos de prueba tiene 78 registros


### Preprocesamiento del texto

In [6]:
stopwords_sp = stopwords.words('spanish')

def pre_procesado(texto):
    texto = texto.lower()
    texto = re.sub(r"[\W\d_]+", " ", texto)
    texto = " ".join([palabra for palabra in texto.split() if palabra not in stopwords_sp])
    return texto

tfidf_vect = TfidfVectorizer(preprocessor=pre_procesado)
tfidf_vect


TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
                dtype=<class 'numpy.float64'>, encoding='utf-8',
                input='content', lowercase=True, max_df=1.0, max_features=None,
                min_df=1, ngram_range=(1, 1), norm='l2',
                preprocessor=<function pre_procesado at 0x000001324B47B8B8>,
                smooth_idf=True, stop_words=None, strip_accents=None,
                sublinear_tf=False, token_pattern='(?u)\\b\\w\\w+\\b',
                tokenizer=None, use_idf=True, vocabulary=None)

In [7]:
X_train = tfidf_vect.fit_transform(train.REQUIREMENT.values)
y_train = train.FINAL_LABEL.values

X_test = tfidf_vect.transform(test.REQUIREMENT.values)
y_test = test.FINAL_LABEL.values

#para verificar, lo transoformamos a matriz
X_train.toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
#o Hacemos la prueba con nuestra y para verifciar el etiquetado
y_train

array(['NF', 'F', 'F', 'NF', 'F', 'F', 'NF', 'NF', 'NF', 'F', 'F', 'NF',
       'F', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'F',
       'F', 'F', 'F', 'F', 'F', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'NF',
       'NF', 'F', 'F', 'F', 'NF', 'F', 'NF', 'NF', 'F', 'F', 'F', 'F',
       'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'F', 'F', 'NF', 'F', 'F', 'F',
       'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'F', 'NF', 'F', 'F', 'F', 'F',
       'F', 'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'NF', 'F', 'F', 'F', 'F',
       'F', 'NF', 'F', 'NF', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
       'NF', 'F', 'F', 'F', 'NF', 'F', 'NF', 'F', 'F', 'F', 'NF', 'F',
       'F', 'F', 'F', 'NF', 'NF', 'F', 'NF', 'F', 'F', 'NF', 'F', 'NF',
       'F', 'F', 'F', 'NF', 'F', 'F', 'F', 'NF', 'F', 'F', 'F', 'NF', 'F',
       'F', 'F', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
       'F', 'F', 'F', 'NF', 'NF', 'F', 'NF', 'F', 'NF', 'F', 'F', 'F',
       'NF', 'F', 'NF', 'F', 'F', 'NF', 'F', 'NF'

### Entrenamiento del modelo

In [9]:
data

Unnamed: 0,PROJECT,REQUIREMENT,FINAL_LABEL
0,16,Poder crear un usuario y acceder a través de é...,NF
1,16,Poder crear un perfil personal e individual a ...,F
2,16,Acceder a la aplicación y a sus funcionalidade...,NF
3,16,Todos los datos introducidos podrán ser leídos...,NF
4,16,"Poder leer, eliminar, editar o incluir cualqui...",F
...,...,...,...
384,30,Gestión de los errores: El asistente debe esta...,F
385,30,Respuestas coherentes e idénticas ante entrada...,F
386,30,Velocidad en las respuestas: El principal serv...,NF
387,30,Facilidad de uso: Las preguntas se deben poder...,NF


In [10]:
#Inicializamos los clasificadores
logreg = LogisticRegression(class_weight='balanced')

In [11]:
#Entrenamos el modelo
modelo1= logreg.fit(X_train, y_train)
modelo1

LogisticRegression(C=1.0, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

### Métricas de evaluación 

In [13]:
# Hacemos las predicciones para los datos de prueba (test)
y_pred_lr = logreg.predict(X_test)
y_pred_lr

array(['F', 'F', 'F', 'F', 'F', 'NF', 'NF', 'F', 'F', 'F', 'NF', 'NF',
       'F', 'F', 'NF', 'F', 'F', 'F', 'F', 'NF', 'F', 'F', 'F', 'F', 'F',
       'F', 'F', 'F', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'F', 'NF',
       'F', 'F', 'F', 'NF', 'F', 'NF', 'F', 'F', 'F', 'NF', 'F', 'F', 'F',
       'F', 'F', 'NF', 'F', 'NF', 'NF', 'F', 'F', 'F', 'NF', 'F', 'F',
       'F', 'NF', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'NF', 'F', 'F', 'NF',
       'NF', 'F'], dtype=object)

In [14]:
print("Regresión Logística")
print(classification_report(y_test, y_pred_lr))
print()

Regresión Logística
              precision    recall  f1-score   support

           F       0.95      0.90      0.92        61
          NF       0.70      0.82      0.76        17

    accuracy                           0.88        78
   macro avg       0.82      0.86      0.84        78
weighted avg       0.89      0.88      0.89        78




### CARGAMOS Y PREPROCESAMOS NUESTROS DATOS

In [62]:
import pandas as pd
data=pd.read_csv('dataGPT.csv')
data2=data
data2.sample (10)

Unnamed: 0,tweet,date,user_location,coordinates,link
364,Los músicos pueden utilizar ChatGPT para mejor...,2023-02-24 23:03:35,Venezuela.,,https://twitter.com/i/web/status/1629255751582...
676,ChatGPT podrá controlar robots en el futuro. A...,2023-02-24 13:42:37,"SLP, México",,https://twitter.com/i/web/status/1629114579002...
409,Los ciberdelincuentes están distribuyendo un #...,2023-02-24 21:31:03,Mexico,,https://twitter.com/i/web/status/1629232461212...
26,Si amar fuese un delito me declaro culpable de...,2023-02-25 19:26:16,Macondo,,https://twitter.com/i/web/status/1629563449440...
997,Microsoft incluye publicidad en las respuestas...,2023-02-23 20:57:01,Ciudad de México,,https://twitter.com/i/web/status/1628861509559...
966,China regulariza uso de ChatGPT a las empresas...,2023-02-23 21:53:25,Venezuela,,https://twitter.com/i/web/status/1628875704409...
367,Seguimos entrenado a ChatGPT para usar Virtual...,2023-02-24 22:50:20,Ciudad De México,,https://twitter.com/i/web/status/1629252414070...
728,#InteligenciaArtificial: China considera a la ...,2023-02-24 11:37:47,Mexico,,https://twitter.com/i/web/status/1629083163203...
512,Parce yo tengo un segundo cerebro en @NotionHQ...,2023-02-24 18:31:38,"Bogotá, D.C., Colombia",,https://twitter.com/i/web/status/1629187311740...
748,🌀 Cómo utilizar ChatGPT para escritura creativ...,2023-02-24 10:27:00,Galicia,,https://twitter.com/i/web/status/1629065349982...


In [63]:
import re 
pattern1 = '?P<pic>pic.twitter.com/[^\s]+'
pattern2 = '?P<url>https?://[^\s]+'

def text_clean(row):
    text = row['tweet']
    
    links = [tuple(j for j in i if j)[-1] for i in re.findall(f"({pattern1})|({pattern2})",text)]
    for link in links:
        text = text.replace(link,"")
             
    hashtags = [interaction for interaction in text.split() if interaction.startswith("#")]
    for hashtag in hashtags:
        text = text.replace(hashtag,"")
        
    mentions = [interaction for interaction in text.split() if interaction.startswith("@")]
    for mention in mentions:
        text = text.replace(mention,"")
        
    return text, links, hashtags, mentions

In [65]:
data2[['clean', 'links', 'hashtags', 'mentions']] = data2.apply(text_clean, axis=1, result_type='expand')

data2.head(10)


Unnamed: 0,tweet,date,user_location,coordinates,link,clean,links,hashtags,mentions
0,El inesperado milagro de #ChatGPT: crece más r...,2023-02-25 20:58:58,"Starbucks, clouds y Chimbote",,https://twitter.com/i/web/status/1629586778708...,El inesperado milagro de crece más rápido que...,[https://t.co/MfrsUH8IQf],"[#ChatGPT:, #TikTok]",[]
1,La batalla de los bots transformará la forma c...,2023-02-25 20:56:49,"Miami, FL",,https://twitter.com/i/web/status/1629586236481...,La batalla de los bots transformará la forma c...,[https://t.co/auZ4NpqDdH],"[#buscadores., #chatgpt, #google, #inteligenci...",[]
2,"LLaMA, el nuevo modelo de lenguaje de Intelige...",2023-02-25 20:52:03,Santa Cruz de la Sierra,,https://twitter.com/i/web/status/1629585036872...,"LLaMA, el nuevo modelo de lenguaje de Intelige...","[https://t.co/12nxKa6lcR, https://t.co/ti9j2lb...",[],[]
3,Qué increíble e interesante es el #ChatGPT 🤯,2023-02-25 20:51:09,"San José, Costa Rica",,https://twitter.com/i/web/status/1629584810338...,Qué increíble e interesante es el 🤯,[],[#ChatGPT],[]
4,💻 Ambas herramientas ofrecen respuestas a preg...,2023-02-25 20:47:35,"Mazatlán, Sinaloa",,https://twitter.com/i/web/status/1629583914040...,💻 Ambas herramientas ofrecen respuestas a preg...,[https://t.co/4kiwV3eE9l],[#ChatGPT],[@StackOverflow]
5,@elkinmartinez6 @5eniorDeveloper Nada de lo qu...,2023-02-25 20:39:00,Hermosillo sonora,,https://twitter.com/i/web/status/1629581751524...,Nada de lo que sea programación enfocada en ...,[],[],"[@elkinmartinez6, @5eniorDeveloper]"
6,"Google Bard: qué es, cómo funciona y qué puede...",2023-02-25 20:35:01,guayaquil,,https://twitter.com/i/web/status/1629580749476...,"Google Bard: qué es, cómo funciona y qué puede...",[https://t.co/YbgVPQrxUP],[],[@xatakamexico]
7,Las empresas se encuentran en continuo cambio ...,2023-02-25 20:31:49,Colombia,,https://twitter.com/i/web/status/1629579945864...,Las empresas se encuentran en continuo cambio ...,[],[],[]
8,#OJO Nuevo malware roba credenciales de redes ...,2023-02-25 20:30:00,Ciudad de México,,https://twitter.com/i/web/status/1629579485824...,Nuevo malware roba credenciales de redes soci...,"[https://t.co/F1bhwIJ1L0, https://t.co/IKW9a2i...",[#OJO],[]
9,¿Has probado #ChatGPT ?: otras cinco 'apps' de...,2023-02-25 20:24:34,"Starbucks, clouds y Chimbote",,https://twitter.com/i/web/status/1629578118775...,¿Has probado ?: otras cinco 'apps' de intelig...,[https://t.co/nnufzyHP37],[#ChatGPT],[]


In [69]:
import regex
import emoji
 
def get_emojis(text):
    emoji_list = []
    data = regex.findall(r'\X', text)
    for word in data:
        if any(char in emoji.UNICODE_EMOJI for char in word):
            emoji_list.append(word)
 
    return emoji_list
 
data2['emojis'] = data2['clean'].apply(lambda text: get_emojis(text))
data2.head(10);

In [70]:
def deEmojify(text):
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags = re.UNICODE)
    return regrex_pattern.sub(r'',text)

In [71]:
def pre_procesado(texto):
    texto = texto.lower()
    texto = re.sub(r"[\W\d_]+", " ", texto)
    texto = " ".join([palabra for palabra in texto.split() if palabra not in stopwords_sp])
    return texto

In [87]:
data2['preproces'] = data2['clean'].apply(lambda texto: pre_procesado(texto))
data2.head(10)



Unnamed: 0,tweet,date,user_location,coordinates,link,clean,links,hashtags,mentions,emojis,sinemojis,preproces
0,El inesperado milagro de #ChatGPT: crece más r...,2023-02-25 20:58:58,"Starbucks, clouds y Chimbote",,https://twitter.com/i/web/status/1629586778708...,El inesperado milagro de crece más rápido que...,[https://t.co/MfrsUH8IQf],"[#ChatGPT:, #TikTok]",[],[],El inesperado milagro de crece más rápido que...,inesperado milagro crece rápido cuenta cientos...
1,La batalla de los bots transformará la forma c...,2023-02-25 20:56:49,"Miami, FL",,https://twitter.com/i/web/status/1629586236481...,La batalla de los bots transformará la forma c...,[https://t.co/auZ4NpqDdH],"[#buscadores., #chatgpt, #google, #inteligenci...",[],[],La batalla de los bots transformará la forma c...,batalla bots transformará forma personas encue...
2,"LLaMA, el nuevo modelo de lenguaje de Intelige...",2023-02-25 20:52:03,Santa Cruz de la Sierra,,https://twitter.com/i/web/status/1629585036872...,"LLaMA, el nuevo modelo de lenguaje de Intelige...","[https://t.co/12nxKa6lcR, https://t.co/ti9j2lb...",[],[],[],"LLaMA, el nuevo modelo de lenguaje de Intelige...",llama nuevo modelo lenguaje inteligencia artif...
3,Qué increíble e interesante es el #ChatGPT 🤯,2023-02-25 20:51:09,"San José, Costa Rica",,https://twitter.com/i/web/status/1629584810338...,Qué increíble e interesante es el 🤯,[],[#ChatGPT],[],[🤯],Qué increíble e interesante es el 🤯,increíble interesante
4,💻 Ambas herramientas ofrecen respuestas a preg...,2023-02-25 20:47:35,"Mazatlán, Sinaloa",,https://twitter.com/i/web/status/1629583914040...,💻 Ambas herramientas ofrecen respuestas a preg...,[https://t.co/4kiwV3eE9l],[#ChatGPT],[@StackOverflow],"[💻, 🤔, 👇]",Ambas herramientas ofrecen respuestas a pregu...,ambas herramientas ofrecen respuestas pregunta...
5,@elkinmartinez6 @5eniorDeveloper Nada de lo qu...,2023-02-25 20:39:00,Hermosillo sonora,,https://twitter.com/i/web/status/1629581751524...,Nada de lo que sea programación enfocada en ...,[],[],"[@elkinmartinez6, @5eniorDeveloper]",[],Nada de lo que sea programación enfocada en ...,programación enfocada demostrar conceptos mate...
6,"Google Bard: qué es, cómo funciona y qué puede...",2023-02-25 20:35:01,guayaquil,,https://twitter.com/i/web/status/1629580749476...,"Google Bard: qué es, cómo funciona y qué puede...",[https://t.co/YbgVPQrxUP],[],[@xatakamexico],[],"Google Bard: qué es, cómo funciona y qué puede...",google bard cómo funciona puedes hacer intelig...
7,Las empresas se encuentran en continuo cambio ...,2023-02-25 20:31:49,Colombia,,https://twitter.com/i/web/status/1629579945864...,Las empresas se encuentran en continuo cambio ...,[],[],[],[],Las empresas se encuentran en continuo cambio ...,empresas encuentran continuo cambio debido tra...
8,#OJO Nuevo malware roba credenciales de redes ...,2023-02-25 20:30:00,Ciudad de México,,https://twitter.com/i/web/status/1629579485824...,Nuevo malware roba credenciales de redes soci...,"[https://t.co/F1bhwIJ1L0, https://t.co/IKW9a2i...",[#OJO],[],[],Nuevo malware roba credenciales de redes soci...,nuevo malware roba credenciales redes sociales...
9,¿Has probado #ChatGPT ?: otras cinco 'apps' de...,2023-02-25 20:24:34,"Starbucks, clouds y Chimbote",,https://twitter.com/i/web/status/1629578118775...,¿Has probado ?: otras cinco 'apps' de intelig...,[https://t.co/nnufzyHP37],[#ChatGPT],[],[],¿Has probado ?: otras cinco 'apps' de intelig...,probado cinco apps inteligencia artificial sor...


In [88]:
data2.to_excel("clean.xls", index=False)

In [90]:
data3= pd.read_excel("clean.xls")
data3

Unnamed: 0,IDTweet,tweet,date,user_location,coordinates,link,clean,links,hashtags,mentions,emojis,sinemojis,preproces
0,A1,El inesperado milagro de #ChatGPT: crece más r...,2023-02-25 20:58:58,"Starbucks, clouds y Chimbote",,https://twitter.com/i/web/status/1629586778708...,El inesperado milagro de crece más rápido que...,['https://t.co/MfrsUH8IQf'],"['#ChatGPT:', '#TikTok']",[],[],El inesperado milagro de crece más rápido que...,inesperado milagro crece rápido cuenta cientos...
1,A2,La batalla de los bots transformará la forma c...,2023-02-25 20:56:49,"Miami, FL",,https://twitter.com/i/web/status/1629586236481...,La batalla de los bots transformará la forma c...,['https://t.co/auZ4NpqDdH'],"['#buscadores.', '#chatgpt', '#google', '#inte...",[],[],La batalla de los bots transformará la forma c...,batalla bots transformará forma personas encue...
2,A3,"LLaMA, el nuevo modelo de lenguaje de Intelige...",2023-02-25 20:52:03,Santa Cruz de la Sierra,,https://twitter.com/i/web/status/1629585036872...,"LLaMA, el nuevo modelo de lenguaje de Intelige...","['https://t.co/12nxKa6lcR', 'https://t.co/ti9j...",[],[],[],"LLaMA, el nuevo modelo de lenguaje de Intelige...",llama nuevo modelo lenguaje inteligencia artif...
3,A4,Qué increíble e interesante es el #ChatGPT 🤯,2023-02-25 20:51:09,"San José, Costa Rica",,https://twitter.com/i/web/status/1629584810338...,Qué increíble e interesante es el 🤯,[],['#ChatGPT'],[],['🤯'],Qué increíble e interesante es el 🤯,increíble interesante
4,A5,💻 Ambas herramientas ofrecen respuestas a preg...,2023-02-25 20:47:35,"Mazatlán, Sinaloa",,https://twitter.com/i/web/status/1629583914040...,💻 Ambas herramientas ofrecen respuestas a preg...,['https://t.co/4kiwV3eE9l'],['#ChatGPT'],['@StackOverflow'],"['💻', '🤔', '👇']",Ambas herramientas ofrecen respuestas a pregu...,ambas herramientas ofrecen respuestas pregunta...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,A996,Baidu lanza Ernie Bot para competir con ChatGP...,2023-02-23 21:00:06,Venezuela,,https://twitter.com/i/web/status/1628862286558...,Baidu lanza Ernie Bot para competir con ChatGPT,['https://t.co/ic8XrEOeR0'],[],[],[],Baidu lanza Ernie Bot para competir con ChatGPT,baidu lanza ernie bot competir chatgpt
996,A997,¿Te imaginaste que un juez realice una audienc...,2023-02-23 20:57:36,"Lima, Peru",,https://twitter.com/i/web/status/1628861658285...,¿Te imaginaste que un juez realice una audienc...,['https://t.co/oNfUuwEhG8'],[],[],['🤖'],¿Te imaginaste que un juez realice una audienc...,imaginaste juez realice audiencia metaverso us...
997,A998,Microsoft incluye publicidad en las respuestas...,2023-02-23 20:57:01,Ciudad de México,,https://twitter.com/i/web/status/1628861509559...,Microsoft incluye publicidad en las respuestas...,['https://t.co/ZbrhqHKALK'],['#Tecnología'],['@Mkt4eCommerceMX'],[],Microsoft incluye publicidad en las respuestas...,microsoft incluye publicidad respuestas creada...
998,A999,Lo que ustedes no saben es que los tuits de la...,2023-02-23 20:53:56,"Cali, Colombia",,https://twitter.com/i/web/status/1628860734229...,Lo que ustedes no saben es que los tuits de la...,[],[],[],['🙏🏻'],Lo que ustedes no saben es que los tuits de la...,ustedes saben tuits maluca carolina sanín escr...


In [92]:
tfidf_vect2 = TfidfVectorizer()
tfidf_tweet = tfidf_vect2.fit_transform(data3['preproces'].values)

tfidf_tweet_matriz = pd.DataFrame(tfidf_tweet.toarray(), columns=tfidf_vect2.get_feature_names())
tfidf_tweet_matriz.index = data3['IDTweet'].values
tfidf_tweet_matriz.T.round(3)

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,A991,A992,A993,A994,A995,A996,A997,A998,A999,A1000
aaaaaaa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abanderado,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abasto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abierta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abierto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
únicamente,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
útil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
útiles,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
𝐂𝐢𝐞𝐧𝐜𝐢𝐚,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [94]:
log.predict(tfidf_tweet)
modelo1

ValueError: X has 3902 features per sample; expecting 1267

In [100]:
nuevos = data3['preproces'].tolist()
nuevos_trans = tfidf_vect2.transform(nuevos)

In [103]:
modelo1.predict(nuevos_trans)

ValueError: X has 3902 features per sample; expecting 1267