In [20]:
import pandas as pd
import re                                   # регулярные выражениями
import numpy as np
from nltk.stem.porter import PorterStemmer  # стеммер
import nltk                               # работа с естественным языком           
from nltk.corpus import stopwords         # список стоп-слов
import string                             # работа со строковыми значениями
from sklearn.feature_extraction.text import TfidfVectorizer          # Преобразование тектов в TFIDF-матрицу
from sklearn.metrics.pairwise import cosine_similarity               # Метрика измерения косинусного расстояния между векторами
import gensim                                        # Работа с моделями W2V
import gensim.downloader
from gensim.models import word2vec,KeyedVectors

In [21]:
# Загрузим данные
df = pd.read_csv('sample-data.csv')

# Сводная информация о загруженном датасете
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           500 non-null    int64 
 1   description  500 non-null    object
dtypes: int64(1), object(1)
memory usage: 7.9+ KB


In [22]:
#Стеммер
stemmer = PorterStemmer()

In [23]:
import nltk
nltk.download('stopwords')
# Справочник английских стоп-слов 
stopwords_english = stopwords.words('english')

# Список стоп-слов
print('Базовый список стоп-слов:', stopwords_english)

Базовый список стоп-слов: ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\a_l_b\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [24]:
# Работа по преобразованию данных в нужный вид.
class dataset():
    def __init__(self, df):
        self.df = df
        self.spec_chars_ = string.punctuation
        self.stopwords_english = stopwords.words('english')
     #Функция удаления символов из текста
    def remove_chars_from_text(self, text_, chars_):
            return "".join([ch for ch in text_ if ch not in chars_])
    # Функция удаления HTML-ссылок
    def remove_urls(self, text_):
        url_remove = re.compile(r'https?://\S+|www\.\S+')
        return url_remove.sub(r' ', str(text_))
    # Функция удаления URL-ссылок
    def remove_html(self, text_):
        html=re.compile(r'<.*?>')
        return html.sub(r' ',str(text_))
    # Функция удаления стоп-слов
    def clear_stop_words(self, text_):
        return ' '.join([word for word in str(text_).split() if word not in stopwords_english])
 
    def forward(self, df):
        # Удаляем URL-ссылки
        df["description"] = df["description"].apply(lambda x: self.remove_urls(str(x)))
        # Удаляем HTML-ссылки
        df["description"] = df["description"].apply(lambda x: self.remove_html(str(x)))
        # Переводим символы к нижнему регистру
        df["description"] = df["description"].apply(lambda x: x.lower())
        # Удаляем пунктуацию и спец-символы
        df["description"] = df["description"].apply(lambda x: self.remove_chars_from_text(str(x), self.spec_chars_))
        # Удаляем цифры
        df["description"] = df["description"].apply(lambda x: self.remove_chars_from_text(str(x), string.digits))
        # Убираем лишние пробелы
        df["description"] = df["description"].apply(lambda x: ' '.join(x.split()))
        # Убираем стоп-слова
        df["description"] = df["description"].apply(lambda x: self.clear_stop_words(x))
        # Stemmer
        df["description"] = df["description"].apply(lambda x: " ".join([stemmer.stem(word) for word in x.split()]))
        return df

In [25]:
d = dataset(df)

In [26]:
data = pd.DataFrame(d.forward(df))

In [27]:
data

Unnamed: 0,id,description
0,1,activ classic boxer there reason boxer cult fa...
1,2,activ sport boxer brief skin glori requir enou...
2,3,activ sport brief superbreath nofli brief mini...
3,4,alpin guid pant skin climb ice switch rock tra...
4,5,alpin wind jkt high ridg steep ice anyth alpin...
...,...,...
495,496,cap bottom cut loos madden crowd search undon ...
496,497,cap crew crew take edg fickl weather clear con...
497,498,alltim shell need use morn time umbrella allti...
498,499,allwear cargo short allwear cargo short bask g...


# Вариант с использование стандартных библиотрек

In [28]:
# Инициализируем токенайзер для преобразования каждого текста в вектор. Максимальный размер словаря 5000 слов
tfidf_vectorizer = TfidfVectorizer(binary=True, max_features=5000)

# Преобразуем очищенные тексты в матрицы TFIDF
tfidf_embedings = tfidf_vectorizer.fit_transform(data['description'])

# Проверяем размерность полученного массива
tfidf_embedings.shape

(500, 3938)

In [29]:
# Вычисляем попарно косинусное расстояние между векторами в матрице
cos_matrix = cosine_similarity(tfidf_embedings)

# Находим индексы значений косинусного расстояния больше 0.8
pair_indexes = np.transpose(np.nonzero(cos_matrix > 0.8))

# Перебираем пары индексов
for x in pair_indexes:
    
    # Если индексы не равны друг другу, что означает, что сравнивались описания разных товаров
    if x[0] != x[1]:
        
        # Выводим пары описаний товаров
        print(df['description'][x])

3      alpin guid pant skin climb ice switch rock tra...
158    alpin guid pant skin climb ice switch rock tra...
Name: description, dtype: object
4      alpin wind jkt high ridg steep ice anyth alpin...
307    alpin wind jkt high ridg steep ice anyth alpin...
Name: description, dtype: object
7      print band betina btm fullest coverag bottom b...
219    solid betina btm fullest coverag bottom betina...
Name: description, dtype: object
26    compound cargo pant long ultim doeveryth pant ...
27    compound cargo pant reg ultim doeveryth pant b...
Name: description, dtype: object
26     compound cargo pant long ultim doeveryth pant ...
451    compound cargo pant short ultim doeveryth pant...
Name: description, dtype: object
27    compound cargo pant reg ultim doeveryth pant b...
26    compound cargo pant long ultim doeveryth pant ...
Name: description, dtype: object
27     compound cargo pant reg ultim doeveryth pant b...
451    compound cargo pant short ultim doeveryth pant...
Name: de

# 2-й вариант. Написал свой tf-idf

In [31]:
# Добавим новое поле description_split, которое будет списком слов
data["description_split"] = data["description"].apply(lambda x: x.split())

In [39]:
# Создаем словарь
vocabulary = []
for i, row  in data.iterrows():
    for j in data["description_split"][i]:
        if j not in vocabulary:
            vocabulary.append(j)
word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

vocabulary_size = len(vocabulary)

In [40]:
# Кол-во документов
len_doc= len(data)

In [41]:
# Размер словаря
vocabulary_size

3947

In [56]:
class tf_idf_model():
    def __init__(self, data, len_doc = len_doc, vocabulary_size = vocabulary_size):
        self.data = data 
        self.tf_matrix = np.zeros((len_doc, vocabulary_size)) 
        self.idf = {}
        self.tf_idf_matrix = np.zeros((len_doc, vocabulary_size)) 
        
    def tf_bild(self, data):
        for i, row  in self.data.iterrows():         
            for w1 in data["description_split"][i]:                                                           
                if w1 in vocabulary:
                    self.tf_matrix[ i, word2idx[w1] ] +=  (1 / len(data["description_split"][i]))
        return self.tf_matrix
    def idf_bild(self, data):
        for word in vocabulary:
            k = 0    
            for i, row  in data.iterrows():  
                if word in data["description_split"][i]:
                    k += 1
            self.idf[word] =  np.log10(len_doc / k)
        return self.idf

    def forward(self, data):
        tf_matrix = self.tf_bild(data)
        idf = self.idf_bild(data)
        
        for word in vocabulary:
            for i in range(len_doc):
                self.tf_idf_matrix[i][word2idx[word]] = tf_matrix[i][word2idx[word]] * idf[word]
        return self.tf_idf_matrix

In [57]:
my_tf_idf = tf_idf_model(data)

In [58]:
tf_idf_matrix = pd.DataFrame(my_tf_idf.forward(data))

In [59]:
tf_idf_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3937,3938,3939,3940,3941,3942,3943,3944,3945,3946
0,0.012735,0.025728,0.088314,0.018618,0.031023,0.027563,0.018219,0.006249,0.010420,0.031023,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.022610,0.000000,0.058801,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.013678,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.011192,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.008300,0.000000,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.008724,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
496,0.008588,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.004214,0.007028,0.000000,...,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
497,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.008629,0.000000,0.000000,...,0.02142,0.02142,0.02142,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
498,0.011541,0.011658,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.00000,0.028114,0.028114,0.028114,0.028114,0.028114,0.028114,0.000000


In [60]:
from sklearn.metrics.pairwise import cosine_similarity               # Метрика измерения косинусного расстояния между векторами
cosine_similarity_tf_idf_matrix = cosine_similarity(tf_idf_matrix)

In [61]:
# Находим индексы значений косинусного расстояния больше 0.8
pair_indexes = np.transpose(np.nonzero(cosine_similarity_tf_idf_matrix > 0.8))

# Перебираем пары индексов
for x in pair_indexes:
    
    # Если индексы не равны друг другу, что означает, что сравнивались описания разных товаров
    if x[0] != x[1]:
        
        # Выводим пары описаний товаров
        print(df['description'][x])

3      alpin guid pant skin climb ice switch rock tra...
158    alpin guid pant skin climb ice switch rock tra...
Name: description, dtype: object
4      alpin wind jkt high ridg steep ice anyth alpin...
307    alpin wind jkt high ridg steep ice anyth alpin...
Name: description, dtype: object
7      print band betina btm fullest coverag bottom b...
219    solid betina btm fullest coverag bottom betina...
Name: description, dtype: object
26    compound cargo pant long ultim doeveryth pant ...
27    compound cargo pant reg ultim doeveryth pant b...
Name: description, dtype: object
26     compound cargo pant long ultim doeveryth pant ...
451    compound cargo pant short ultim doeveryth pant...
Name: description, dtype: object
27    compound cargo pant reg ultim doeveryth pant b...
26    compound cargo pant long ultim doeveryth pant ...
Name: description, dtype: object
27     compound cargo pant reg ultim doeveryth pant b...
451    compound cargo pant short ultim doeveryth pant...
Name: de

Name: description, dtype: object
419    riverwalk sticki high water slick bedrock cobb...
112    riverwalk felt high water slick bedrock cobbl ...
Name: description, dtype: object
419    riverwalk sticki high water slick bedrock cobb...
113    riverwalk stud high water slick bedrock cobbl ...
Name: description, dtype: object
431    girl live simpli deer tshirt softwear ringspun...
57     logo tshirt softwear ringspun organ cotton scr...
Name: description, dtype: object
431    girl live simpli deer tshirt softwear ringspun...
62     fli fish tshirt softwear ringspun organ cotton...
Name: description, dtype: object
431    girl live simpli deer tshirt softwear ringspun...
64     live simpli guitar tshirt softwear ringspun or...
Name: description, dtype: object
431    girl live simpli deer tshirt softwear ringspun...
432    girl live simpli seal tshirt softwear ringspun...
Name: description, dtype: object
432    girl live simpli seal tshirt softwear ringspun...
57     logo tshirt softwear 

# WORD2VEC

In [None]:
# Загружаем предобученную модель
word_vectors = gensim.downloader.load("word2vec-google-news-300")

In [30]:
# Создаем пустой DataFrame для эмбедингов текстов
docs_vectors = pd.DataFrame()

# Идем по текстам
for doc in data['description']:
    
    # Создаем пустой DataFrame для хранения векторов слов текста
    temp = pd.DataFrame()
    
    # Идем по словам в тексте
    for word in doc.split(' '):
        
        try:
            # Векторизуем слово с использованием предобученной модели W2V
            word_vec = word_vectors[word]

            # Добавляем вектор слова в DF
            temp = temp.append(pd.Series(word_vec), ignore_index = True)
            
        except:
            pass
        
    # Вычисляем вектор текста усредняя массив векторов слов
    doc_vector = temp.mean()
    
    # Добавляем вектор текста в DF эмбедингов текстов
    docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)

# Размерность DF эмбедингов текстов    
docs_vectors.shape

  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = docs_vectors.append(doc_vector, ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  temp = temp.append(pd.Series(word_vec), ignore_index = True)
  docs_vectors = doc

(500, 300)

In [35]:
# Примеры векторизованных текстов
docs_vectors.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
246,-0.010637,0.036027,-0.022741,0.091949,-0.103591,0.002241,0.046943,-0.115362,0.042649,0.043243,...,-0.09471,-0.018391,-0.059907,-0.08154,0.016225,-0.050821,0.029154,0.01933,0.015523,0.043067
134,-0.064471,0.072878,0.005432,-0.015769,-0.090759,-0.022298,0.058849,-0.08665,0.061625,0.082275,...,-0.057861,0.041439,-0.068804,0.009345,-0.03552,-0.066657,0.048324,-0.030881,-0.011342,0.001026
256,-0.002893,0.029724,-0.024451,0.068431,-0.067511,-0.008963,0.065377,-0.083518,0.077112,0.072567,...,-0.026885,0.027326,-0.066785,0.017181,-0.032571,-0.037747,0.021783,-0.05683,0.019538,0.01774
363,-0.01824,0.052597,-0.001162,0.038519,-0.051881,-0.015857,0.014083,-0.095194,0.094443,0.094764,...,-0.031378,0.011135,-0.09097,0.047584,-0.034989,-0.071136,0.062725,0.005143,0.04234,0.013738
243,-0.06132,0.048766,-0.029659,0.056129,-0.049346,-0.065844,0.032107,-0.113993,0.011851,0.109122,...,-0.023781,0.034406,-0.04984,0.029876,-0.035379,-0.062105,0.064212,-0.018301,-0.003701,-0.028966


In [36]:
# Вычисляем попарно косинусное расстояние между векторами в матрице, округляем значения до 5 знака после запятой
cos_matrix_w2v = np.around(cosine_similarity(docs_vectors), 5)
cos_matrix_w2v

array([[1.     , 0.89715, 0.82771, ..., 0.8083 , 0.83906, 0.84815],
       [0.89715, 1.     , 0.92409, ..., 0.85416, 0.87068, 0.86517],
       [0.82771, 0.92409, 1.     , ..., 0.82518, 0.81348, 0.81482],
       ...,
       [0.8083 , 0.85416, 0.82518, ..., 1.     , 0.86359, 0.81589],
       [0.83906, 0.87068, 0.81348, ..., 0.86359, 1.     , 0.94493],
       [0.84815, 0.86517, 0.81482, ..., 0.81589, 0.94493, 1.     ]],
      dtype=float32)

In [55]:
# Находим пары векторов, косинусное расстояние между которыми больше 0.95
pair_indexes_w2v = np.transpose(np.nonzero(cos_matrix_w2v > 0.95))

# Перебираем пары индексов
for x in pair_indexes_w2v:
    
    # Если индексы не равны друг другу, что означает, что сравнивались описания разных товаров
    if x[0] != x[1]:
        
        # Выводим пары похожих описаний товаров
        print(df['description'][x])

3      alpin guid pant skin climb ice switch rock tra...
158    alpin guid pant skin climb ice switch rock tra...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
212    rock guid pant rock guid made soft stretchwove...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
330    travers pant stride lightli juan tabo picnic a...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
342    rock guid pant first foremost lightweight soft...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
352    simpl guid pant stapl rock ice mix climb sprin...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
378    lw guid pant backcountri version immens popula...
Name: description, dtype: object
3      alpin guid pant skin climb ice switch rock tra...
383    lw guid pant backcountri version immens popula...
Name

Name: description, dtype: object
33     deliveri short local know best spot deliveri s...
462    custodian pant short graveyard shift advantag ...
Name: description, dtype: object
33     deliveri short local know best spot deliveri s...
499    allwear short time simplifi allwear short prov...
Name: description, dtype: object
34     sweater vest imposs confus sweater vest found ...
178    sweater there much see feet chasm view bivi ve...
Name: description, dtype: object
34     sweater vest imposs confus sweater vest found ...
203    micro puff vest mani alpin shiverfest leav mum...
Name: description, dtype: object
34     sweater vest imposs confus sweater vest found ...
281    sweater vest imposs confus sweater vest found ...
Name: description, dtype: object
34     sweater vest imposs confus sweater vest found ...
415    micro puff vest mani alpin shiverfest leav mum...
Name: description, dtype: object
34     sweater vest imposs confus sweater vest found ...
472    sweater there much se

Name: description, dtype: object
62     fli fish tshirt softwear ringspun organ cotton...
432    girl live simpli seal tshirt softwear ringspun...
Name: description, dtype: object
63    gpiw classic tshirt softwear ringspun organ co...
57    logo tshirt softwear ringspun organ cotton scr...
Name: description, dtype: object
63    gpiw classic tshirt softwear ringspun organ co...
62    fli fish tshirt softwear ringspun organ cotton...
Name: description, dtype: object
63    gpiw classic tshirt softwear ringspun organ co...
64    live simpli guitar tshirt softwear ringspun or...
Name: description, dtype: object
63     gpiw classic tshirt softwear ringspun organ co...
431    girl live simpli deer tshirt softwear ringspun...
Name: description, dtype: object
63     gpiw classic tshirt softwear ringspun organ co...
432    girl live simpli seal tshirt softwear ringspun...
Name: description, dtype: object
64    live simpli guitar tshirt softwear ringspun or...
57    logo tshirt softwear ringspun

Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
200    merino tank finespun cool enough lower tempera...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
201    merino crew move peopl cultur new place sticki...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
202    merino dress stuff flow dress backpack redey l...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
259    merino crew lightest merino layer silkysoft ye...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
264    merino tshirt soft worth linger lightest merin...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
265    merino bottom thank dri warm forward progress ...
Name: description, dtype: object
88     merino polo feel bohemian look least get foot ...
268    merino tshirt versati

Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
138    know tshirt inspir thoreau immort word yvon ch...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
166    birdwalk tshirt temporarili ground artist chri...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
168    bouquet tshirt garland peacock deer mandala gr...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
169    butterfli logo tshirt like monarch butterfli l...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
177    ditch carrid tshirt fun get enjoy ride ditch c...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
184    hill tshirt head hill wear simplydesign hill t...
Name: description, dtype: object
115    rockpil tshirt beauti boulder stack togeth roc...
236    wheat tshirt feed urb

Name: description, dtype: object
153    watermast wader short blackberri barbwir basal...
454    guidewat wader king built guidewat wader tough...
Name: description, dtype: object
153    watermast wader short blackberri barbwir basal...
467    guidewat wader reg built guidewat wader toughe...
Name: description, dtype: object
154    activ hipster perfect life might nomad perfect...
299    activ brief whether your beat heat bali skin f...
Name: description, dtype: object
158    alpin guid pant skin climb ice switch rock tra...
3      alpin guid pant skin climb ice switch rock tra...
Name: description, dtype: object
158    alpin guid pant skin climb ice switch rock tra...
212    rock guid pant rock guid made soft stretchwove...
Name: description, dtype: object
158    alpin guid pant skin climb ice switch rock tra...
330    travers pant stride lightli juan tabo picnic a...
Name: description, dtype: object
158    alpin guid pant skin climb ice switch rock tra...
342    rock guid pant first 

Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
25     citi sea tshirt illustr renaiss seainspir comm...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
35     dragoon tshirt dragoon drop weapon favor big w...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
40     fish frenzi tshirt depend freeflow current sal...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
55     iceberg tshirt effect global warm run deep dep...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
72     live simpli guitar tshirt live life unplug tee...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
94     mountain island tshirt aspir meld mountain wor...
Name: description, dtype: object
184    hill tshirt head hill wear simplydesign hill t...
101    planer tshirt planer 

Name: description, dtype: object
209    nine trail vest simplic action vest protect co...
363    nine trail vest simplic action vest protect co...
Name: description, dtype: object
210    r jkt modern mountain stapl adapt r jacket ser...
380    r jkt modern mountain stapl adapt r jacket ser...
Name: description, dtype: object
211    rain shadow jkt stay insid isnt way stay dri e...
66     torrentshel jkt barometr pressur get low torre...
Name: description, dtype: object
211    rain shadow jkt stay insid isnt way stay dri e...
84     jkt made thrive thin technic climb full alpin ...
Name: description, dtype: object
211    rain shadow jkt stay insid isnt way stay dri e...
106    rain shadow pant light enough afterthought dow...
Name: description, dtype: object
211    rain shadow jkt stay insid isnt way stay dri e...
196    jkt made thrive thin technic climb full alpin ...
Name: description, dtype: object
211    rain shadow jkt stay insid isnt way stay dri e...
273    torrentshel jkt tropi

Name: description, dtype: object
238    yosemit essenc tshirt artist phylli shafer gor...
488    fitz roy studi tshirt unveil fitzroy massif co...
Name: description, dtype: object
238    yosemit essenc tshirt artist phylli shafer gor...
490    fli fish tshirt life color flyingfish tshirt f...
Name: description, dtype: object
238    yosemit essenc tshirt artist phylli shafer gor...
492    logo tshirt patagonia timeless logo tshirt pay...
Name: description, dtype: object
241    print bibiana top athlet bibiana top racerback...
221    solid bibiana top athlet bibiana top racerback...
Name: description, dtype: object
242    print boucau halter top boucau soft tie neck b...
251    solid boucau halter top boucau soft tie neck b...
Name: description, dtype: object
243    print kupala top fix triangl bikini top kupala...
252    solid kupala top fix triangl bikini top kupala...
Name: description, dtype: object
244    runshad tshirt backcountri anyplac move comfor...
116    runshad top trailhead

Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
35     dragoon tshirt dragoon drop weapon favor big w...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
40     fish frenzi tshirt depend freeflow current sal...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
72     live simpli guitar tshirt live life unplug tee...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
97     organ logo tshirt origin print first season de...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
101    planer tshirt planer tshirt give nod surf work...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
115    rockpil tshirt beauti boulder stack togeth roc...
Name: description, dtype: object
275    trampoli tshirt take commut higher ground tram...
123    squid tshirt artist t

Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
238    yosemit essenc tshirt artist phylli shafer gor...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
275    trampoli tshirt take commut higher ground tram...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
287    north coast tshirt north coast tshirt speak da...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
288    owlin around tshirt owlin around tshirt wise c...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
321    tarpon tshirt channel determin silver king wea...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
332    trout head tshirt vibrant spirit trout captur ...
Name: description, dtype: object
314    kite town tshirt artist chri del moro transfor...
333    live simpli deer tshi

Name: description, dtype: object
337    ultra hw mountain sock everi weather window sh...
80     lw hike crew sock like suit feet staycool brea...
Name: description, dtype: object
337    ultra hw mountain sock everi weather window sh...
456    mw hike crew sock mount mendel ice nine long f...
Name: description, dtype: object
338    ultra lw endur ped sock sock fieri day even sh...
78     lw endur ankl sock fastforward pursuit like ri...
Name: description, dtype: object
338    ultra lw endur ped sock sock fieri day even sh...
79     lw endur quarter sock scrape tape inflam ankl ...
Name: description, dtype: object
339    cap graphic tshirt eleg union grace design tec...
19     cap graphic tshirt tee made lightestweight cap...
Name: description, dtype: object
339    cap graphic tshirt eleg union grace design tec...
20     cap tshirt minim style technic evolv punish ex...
Name: description, dtype: object
339    cap graphic tshirt eleg union grace design tec...
171    cap tshirt good breez

Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
166    birdwalk tshirt temporarili ground artist chri...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
169    butterfli logo tshirt like monarch butterfli l...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
177    ditch carrid tshirt fun get enjoy ride ditch c...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
184    hill tshirt head hill wear simplydesign hill t...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
238    yosemit essenc tshirt artist phylli shafer gor...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
275    trampoli tshirt take commut higher ground tram...
Name: description, dtype: object
371    peregrin tshirt born wander onceendang peregri...
287    north coast tshirt no

Name: description, dtype: object
396    wind path tshirt visual energi wind path tshir...
492    logo tshirt patagonia timeless logo tshirt pay...
Name: description, dtype: object
397    marlwalk stalk coral flat tough feet even toug...
112    riverwalk felt high water slick bedrock cobbl ...
Name: description, dtype: object
397    marlwalk stalk coral flat tough feet even toug...
113    riverwalk stud high water slick bedrock cobbl ...
Name: description, dtype: object
397    marlwalk stalk coral flat tough feet even toug...
419    riverwalk sticki high water slick bedrock cobb...
Name: description, dtype: object
399    retro grade short advantag number cam retro gr...
111    retro grade pant use sonni trotter wish list g...
Name: description, dtype: object
401    river short river life mighti fine even occasi...
11     baggi short even baggi popular short anyth not...
Name: description, dtype: object
401    river short river life mighti fine even occasi...
187    intercontinent pant l

Name: description, dtype: object
439    cap bottom unwav foundat coldweath cloth syste...
24     cap bottom sufferfest like grand januari pair ...
Name: description, dtype: object
439    cap bottom unwav foundat coldweath cloth syste...
175    cap bottom world consist granit frozen place c...
Name: description, dtype: object
439    cap bottom unwav foundat coldweath cloth syste...
440    cap crew capilen versatil fastestwick perform ...
Name: description, dtype: object
439    cap bottom unwav foundat coldweath cloth syste...
441    cap bottom ice climber alpinist know pitfal sw...
Name: description, dtype: object
439    cap bottom unwav foundat coldweath cloth syste...
442    cap crew fundament funhog lifestyl crew noncha...
Name: description, dtype: object
440    cap crew capilen versatil fastestwick perform ...
23     cap zip neck capilen provid excel insul nextto...
Name: description, dtype: object
440    cap crew capilen versatil fastestwick perform ...
439    cap bottom unwav foun

Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
372    permit silhouett tshirt permit scytheshap dors...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
396    wind path tshirt visual energi wind path tshir...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
447    cerro torr one tshirt intermin spirit cerro to...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
469    babi live simpli deer tshirt live simpli deer ...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
470    babi live simpli seal tshirt swim fish haul si...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
475    babi tag your tshirt fair mani leg tag plu eye...
Name: description, dtype: object
466    babi circu tshirt anim savanna work togeth inf...
490    fli fish tshirt life 

Name: description, dtype: object
489    flyfish athabasca poster patrik ondru wet line...
331    travers auguil dentrev janin patitucci navig s...
Name: description, dtype: object
489    flyfish athabasca poster patrik ondru wet line...
370    stick n stone morocco poster patagonia rock am...
Name: description, dtype: object
489    flyfish athabasca poster patrik ondru wet line...
395    wild steelhead alaska poster catch releas stee...
Name: description, dtype: object
490    fli fish tshirt life color flyingfish tshirt f...
25     citi sea tshirt illustr renaiss seainspir comm...
Name: description, dtype: object
490    fli fish tshirt life color flyingfish tshirt f...
35     dragoon tshirt dragoon drop weapon favor big w...
Name: description, dtype: object
490    fli fish tshirt life color flyingfish tshirt f...
40     fish frenzi tshirt depend freeflow current sal...
Name: description, dtype: object
490    fli fish tshirt life color flyingfish tshirt f...
55     iceberg tshirt effect