In [1]:
import pandas as pd
import numpy as np
import csv
import MeCab
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import re
import neologdn
import demoji
import emoji

model_dir = '/Users/iomacbookair2/Documents/lab/DEIM2023/entity_vector/entity_vector.model.bin'
# model_dir = '/Users/labimac/Documents/lab/DEIM2023/entity_vector/entity_vector.model.bin'
model_word2vec = KeyedVectors.load_word2vec_format(model_dir, binary=True)

In [2]:
def calc_similarity_word2vec(sentence1, sentence2, model):
  mecab = MeCab.Tagger(
      '-d /opt/homebrew/lib/mecab/dic/mecab-ipadic-neologd')
  pre_sentence1 = sentence1.split(" ")
  sentence1_words = []
  for s1 in pre_sentence1:
    sentence1_words += [line.split("\t")[0]
                        for line in mecab.parse(s1).split("\n")[:-2]]
  pre_sentence2 = sentence2.split(" ")
  sentence2_words = []
  for s2 in pre_sentence2:
    sentence2_words += [line.split("\t")[0]
                        for line in mecab.parse(s2).split("\n")[:-2]]
  if not sentence1_words and not sentence2_words:
    similarity = 1
  elif not sentence1_words or not sentence2_words:
    similarity = 0
  elif (len(sentence1_words) == 1 and len(sentence2_words) != 1) or (len(sentence2_words) == 1 and len(sentence1_words) != 1):
    similarity = 0
  elif all(re.match(r'.*[a-zA-Z].*', word) for word in sentence1_words) or all(re.match(r'.*[a-zA-Z].*', word) for word in sentence2_words):
    similarity = 0
  else:
    sentence1_embedding = np.mean([model[word]
                                  for word in sentence1_words if word in model], axis=0)
    sentence2_embedding = np.mean([model[word]
                                  for word in sentence2_words if word in model], axis=0)
    if np.isnan(sentence1_embedding).any() or np.isnan(sentence2_embedding).any():
        similarity = 0
    else:
      similarity = cosine_similarity(
          [sentence1_embedding], [sentence2_embedding])[0][0]
  return similarity


def calc_average_similarity_word2vec(sentences, key):
    similarities = []
    for i in range(len(sentences)):
        for j in range(i + 1, len(sentences)):
            similarity = calc_similarity_word2vec(
                sentences[i], sentences[j], model_word2vec)
            similarities.append(similarity)
    q = [0, 0.25, 0.5, 0.75, 1]
    outputs = {}
    for i in range(len(q)):
        outputs[f"quantile_{i}"] = np.quantile(similarities, q[i])
    outputs["q1"] = outputs["quantile_1"]
    outputs["q2"] = outputs["quantile_2"]
    outputs["q3"] = outputs["quantile_3"]
    outputs["standard_deviation"] = np.std(similarities)
    outputs["mean"] = np.mean(similarities)
    if key == "q2":
        return outputs["quantile_2"]
    elif key == "q1":
        return outputs["quantile_1"]
    elif key == "q3":
        return outputs["quantile_3"]
    elif key == "standard_deviation":
        return outputs["standard_deviation"]
    elif key == "mean":
        return outputs["mean"]


In [3]:
path = "/Users/iomacbookair2/Documents/lab/DEIM2023/tweet_csv/nichiten/230115_nichiten.csv"
# path = "/Users/labimac/Documents/lab/DEIM2023/tweet_csv/221214_ann_wed.csv"
df = pd.read_csv((path))
df.sort_values(by = 'created_at', ascending = True, inplace = True)
df = df.reset_index(drop=True)
df['created_at'] = pd.to_datetime(df['created_at'])
df = df.drop("author_id", axis=1)
df = df.drop("username", axis=1)
df = df.drop("tweet_id", axis=1)
df = df.drop("like_count", axis=1)
df = df.drop("retweet_count", axis=1)

df.to_csv("sorted.csv", index=False)
df

Unnamed: 0,created_at,text
0,2023-01-15 10:00:00,#nichiten „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åôüìª
1,2023-01-15 10:00:01,ÂÆâ‰Ωè„Åï„Çì„ÄÅ‰∏≠Êæ§„Åï„Çì„ÄÅ„Çπ„Çø„ÉÉ„Éï„ÅÆ„Åø„Å™„Åï„Çì„ÄÅ„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇ „Åù„Åó„Å¶„ÄÅÂÆâ‰Ωè„Åï„Çì„Åä„Åã„Åà„Çä„Å™„Åï„ÅÑ...
2,2023-01-15 10:00:01,#nichiten
3,2023-01-15 10:00:01,#nichiten #ÂÆâ‰ΩèÁ¥≥‰∏ÄÈÉé „Åï„ÅÅÔºÅ ‚ÄùnichitenÂÆâ‰Ωè‚Äù„ÅÆ2023Âπ¥ Âßã„Åæ„Çä„Åß...
4,2023-01-15 10:00:01,„ÄêÂ±ÄÈï∑ÂæÖÈÅáÂ§ßÂæ©Ê¥ªÁ•≠„Äë„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇ #nichiten
...,...,...
3934,2023-01-15 11:59:10,„Ç®„É≥„Éá„Ç£„É≥„Ç∞„ÅØ „Äåshow me„Äç „Ç≥„É≠„Éä„Å´ÁΩπ„Å£„ÅüÂÆâ‰Ωè„Åï„Çì„Åå „Ç≥„É≠„Éä„Çí„Å™„ÅÑ„Åå„Åó„Çç„Å´„Åô„Çã ÊîøÂ∫ú...
3935,2023-01-15 11:59:13,ÂÆâ‰Ωè„Åï„Çìü•∏‰∏≠Êæ§„Åï„Çìüë∏ È´òÊ©ãÊ¥ãÂ≠ê„Åï„Çìüë∏ ‰ªäÊó•„ÇÇÊ•Ω„Åó„ÅÑ2ÊôÇÈñì„Åß„Åó„ÅüÔºÅ ÂÆâ‰Ωè„Åï„Çìü•∏„ÄÅ „ÅäË∫´‰Ωì„Çí„ÅäÂ§ß...
3936,2023-01-15 11:59:33,‰∏≠Â∞æÊòéÊÖ∂„Åï„Çì„ÄÅÂ•Ω„Åç„Å™‰ø≥ÂÑ™„Åï„ÇìüòÜ#nichiten
3937,2023-01-15 11:59:44,ÈªíÊú®ËèØ„Å°„ÇÉ„Çì„ÅÆ„ÄéÂá™„ÅÆ„ÅäÊöá„Äè„ÇÇ„ÄéÈáçÁâàÂá∫Êù•ÔºÅ„Äè„ÇÇËâØ„Åã„Å£„Åü„Å™„ÅÅ„ÄÇ #nichiten


In [4]:
def preprocess(text):
    text = emoji.replace_emoji(text, replace=' ')
    text = neologdn.normalize(text)
    text = re.sub(r'#\S+', '', text) # „Éè„ÉÉ„Ç∑„É•„Çø„Ç∞„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(r'ÔºÉ\S+', '', text) # „Éè„ÉÉ„Ç∑„É•„Çø„Ç∞„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(r'http?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', text) # URL„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(r'https?://\S+', ' ', text) # URL„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(r'[!-/:-@[-`{-~]', r' ', text) # Ë®òÂè∑„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(
        u'[‚ñ†-‚ôØ„Äê„Äë„Äå„Äç„Äé„Äè„Éª„ÖÇÔæüÀä·óú„ÄÅ„ÄÇ‚àÄ„Äá‚ï∞Àãœâ‚Ä¶‚ï≠¬¥ÔΩÄ‚Ä¢Àò–¥‚ÜëËâ∏‚ïØ‚Üí¬∞–¥ÃÄ·¥óÀÉÀÇ‚ÅΩ‚ÅæœÜl‚îîÔºº‚ÄªÂΩ°ñ•¶‚ÜêÍÇπ]', ' ', text) # Ë®òÂè∑„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = re.sub(r'(\d)([,.])(\d+)', r'\1\3', text) # Â∞èÊï∞ÁÇπ„Å®„Ç´„É≥„Éû„ÇíÊ∂à„Åô
    text = re.sub(r'\d+', '0', text) # Êï∞Â≠ó„Çí0„Å´ÁΩÆ„ÅçÊèõ„Åà
    text = text.lower() # Ëã±Â≠ó„ÇíÂ∞èÊñáÂ≠ó„Å´
    text = re.sub(r"[\u3000\t\r\n]", " ", text) # Á©∫ÁôΩÊñáÂ≠ó„Çí„Çπ„Éö„Éº„Çπ„Å´ÁΩÆ„ÅçÊèõ„Åà
    return text


df['text'] = df['text'].apply(preprocess)
df
df.to_csv('preprocessed.csv', index=False)


In [5]:
groups = df.groupby(pd.Grouper(key='created_at', freq='min'))
df_texts_by_minute = pd.DataFrame({
    "texts_by_minute": groups.apply(lambda x: x["text"].tolist())
})
df_texts_by_minute = df_texts_by_minute.reset_index()
df_texts_by_minute

Unnamed: 0,created_at,texts_by_minute
0,2023-01-15 10:00:00,"[, ÂÆâ‰Ωè„Åï„Çì ‰∏≠Êæ§„Åï„Çì „Çπ„Çø„ÉÉ„Éï„ÅÆ„Åø„Å™„Åï„Çì „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô „Åù„Åó„Å¶ ÂÆâ‰Ωè„Åï„Çì„Åä„Åã„Åà„Çä„Å™..."
1,2023-01-15 10:01:00,"[ÂÆâ‰ΩèÁ¥≥‰∏ÄÈÉé„ÅÆÊó•ÊõúÂ§©ÂõΩ„ÅØ„Åò„Åæ„Åü , „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô , „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô Êò®Êó•„ÅÆn„Ç≠..."
2,2023-01-15 10:02:00,"[ÂÆâ‰Ωè„Åï„Çì „Åæ„Å†Â£∞„ÅåÂøÉÈÖç„Åó„Å¶„Å†„Å≠ , „Åä„ÅØ„Çà„ÅÜÂæ°Â∫ß„ÅÑ„Åæ„ÅôÂÆâ‰Ωè„Åï„Çì„ÅäÂ∏∞„Çä„Å™„Åï„ÅÑ , „Ç™„Éè„É®„Ç¶„Ç¥..."
3,2023-01-15 10:03:00,"[„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô ÂÆâ‰Ωè„Ç¢„Éä„ÅØ „ÅäÂ∏∞„Çä„Å™„Åï„ÅÑ „ÇÑ„Å£„Å±„Çä Â£∞„Åå„Åæ„Å†Êú¨Ë™øÂ≠ê„Åò„ÇÉ„Å™„ÅÑ„Å≠ , ..."
4,2023-01-15 10:04:00,"[„Åæ„Å†„ÇÑ„Çì„ÅÆ„Åã„ÅÑ Á¨ë , „Åæ„Å†ÈºªÂ£∞„Å†„Å≠ Âñâ„Çí„ÅäÂ§ß‰∫ã„Å´ , ÁôΩ„ÅÑÂ∑®Â°î„ÅØÂîêÊ≤¢ÂØøÊòé„ÅÆÂπ≥ÊàêÁîü„Åæ„Çå„Åß..."
...,...,...
115,2023-01-15 11:55:00,"[„ÅÜ„Åª„Åª „Å™„Å§„Åã„Åó„ÅÑÊõ≤ , „Åù„ÅÜ„Åã ÂÜ∑ÂáçÂíåÈ¢®ÈáéËèú„Åß„Ç´„É¨„Éº‰Ωú„Çå„Å∞„ÅÑ„ÅÑ„Çì„Å† , ÊúÄÂæå„ÅØÊù±ÈáéËã±Ê≤ª..."
116,2023-01-15 11:56:00,"[ÂÆüÊ≥Å„Åä„Å§„Åß„Åó„Åü , show me Êáê„Åã„Åó„ÅÑ, „ÅÇ„Å™„Åü„Çâ„Åó„Åè„ÇÇ„Å™„ÅÑ„ÅØ, „ÅÇ„Çä„Åå„Å®„ÅÜ„Åî„Åñ..."
117,2023-01-15 11:57:00,"[„ÅÇ„Å£„Å®„ÅÑ„ÅÜÈñì„Å´ÁµÇ„Çè„Å£„ÅüÊÑü Ê•Ω„Åó„Åã„Å£„Åü , Ê•Ω„Åó„Åã„Å£„Åü„Åß„Åô Á¥†Êô¥„Çâ„Åó„ÅÑÊîæÈÄÅ„Å´ÊãçÊâã ÂÆâ‰Ωè„Åï„Çì..."
118,2023-01-15 11:58:00,"[ÂÆâ‰Ωè„Åï„Çì„ÅäÂ§ß‰∫ã„Å´Âñâ„ÅØÁÑ°ÁêÜ„Åô„Çã„Å®Èï∑Âºï„Åè„Åã„Çâ„ÅîÁÑ°ÁêÜ„Å™„Åï„Çâ„Åö, „Ç∑„Éä„É¢„É≥„ÅØÊúüÈôêÂàá„Çå„Å¶„Å¶„ÇÇÂïèÈ°å„Å™„Åè..."


In [6]:
df_texts_by_minute["q1"] = df_texts_by_minute["texts_by_minute"].apply(
    lambda x: 0 if len(x) <= 1 else calc_average_similarity_word2vec(x, "q1"))


  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis

In [7]:
df_texts_by_minute["q2"] = df_texts_by_minute["texts_by_minute"].apply(
    lambda x: 0 if len(x) <= 1 else calc_average_similarity_word2vec(x, "q2"))


  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis

In [8]:
df_texts_by_minute["q3"] = df_texts_by_minute["texts_by_minute"].apply(
    lambda x: 0 if len(x) <= 1 else calc_average_similarity_word2vec(x, "q3"))


  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis

In [9]:
df_texts_by_minute["stdev"] = df_texts_by_minute["texts_by_minute"].apply(
    lambda x: 0 if len(x) <= 1 else calc_average_similarity_word2vec(x, "standard_deviation"))


  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis=axis, dtype=dtype,
  return _methods._mean(a, axis

In [10]:
df_texts_by_minute

Unnamed: 0,created_at,texts_by_minute,q1,q2,q3,stdev
0,2023-01-15 10:00:00,"[, ÂÆâ‰Ωè„Åï„Çì ‰∏≠Êæ§„Åï„Çì „Çπ„Çø„ÉÉ„Éï„ÅÆ„Åø„Å™„Åï„Çì „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô „Åù„Åó„Å¶ ÂÆâ‰Ωè„Åï„Çì„Åä„Åã„Åà„Çä„Å™...",0.000000,0.247872,0.733923,0.366441
1,2023-01-15 10:01:00,"[ÂÆâ‰ΩèÁ¥≥‰∏ÄÈÉé„ÅÆÊó•ÊõúÂ§©ÂõΩ„ÅØ„Åò„Åæ„Åü , „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô , „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô Êò®Êó•„ÅÆn„Ç≠...",0.165027,0.453224,0.658940,0.301978
2,2023-01-15 10:02:00,"[ÂÆâ‰Ωè„Åï„Çì „Åæ„Å†Â£∞„ÅåÂøÉÈÖç„Åó„Å¶„Å†„Å≠ , „Åä„ÅØ„Çà„ÅÜÂæ°Â∫ß„ÅÑ„Åæ„ÅôÂÆâ‰Ωè„Åï„Çì„ÅäÂ∏∞„Çä„Å™„Åï„ÅÑ , „Ç™„Éè„É®„Ç¶„Ç¥...",0.237295,0.479636,0.636866,0.263714
3,2023-01-15 10:03:00,"[„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô ÂÆâ‰Ωè„Ç¢„Éä„ÅØ „ÅäÂ∏∞„Çä„Å™„Åï„ÅÑ „ÇÑ„Å£„Å±„Çä Â£∞„Åå„Åæ„Å†Êú¨Ë™øÂ≠ê„Åò„ÇÉ„Å™„ÅÑ„Å≠ , ...",0.048211,0.438705,0.661849,0.301990
4,2023-01-15 10:04:00,"[„Åæ„Å†„ÇÑ„Çì„ÅÆ„Åã„ÅÑ Á¨ë , „Åæ„Å†ÈºªÂ£∞„Å†„Å≠ Âñâ„Çí„ÅäÂ§ß‰∫ã„Å´ , ÁôΩ„ÅÑÂ∑®Â°î„ÅØÂîêÊ≤¢ÂØøÊòé„ÅÆÂπ≥ÊàêÁîü„Åæ„Çå„Åß...",0.216755,0.484878,0.684719,0.276789
...,...,...,...,...,...,...
115,2023-01-15 11:55:00,"[„ÅÜ„Åª„Åª „Å™„Å§„Åã„Åó„ÅÑÊõ≤ , „Åù„ÅÜ„Åã ÂÜ∑ÂáçÂíåÈ¢®ÈáéËèú„Åß„Ç´„É¨„Éº‰Ωú„Çå„Å∞„ÅÑ„ÅÑ„Çì„Å† , ÊúÄÂæå„ÅØÊù±ÈáéËã±Ê≤ª...",0.329736,0.571705,0.714191,0.242108
116,2023-01-15 11:56:00,"[ÂÆüÊ≥Å„Åä„Å§„Åß„Åó„Åü , show me Êáê„Åã„Åó„ÅÑ, „ÅÇ„Å™„Åü„Çâ„Åó„Åè„ÇÇ„Å™„ÅÑ„ÅØ, „ÅÇ„Çä„Åå„Å®„ÅÜ„Åî„Åñ...",0.397188,0.610091,0.730461,0.237783
117,2023-01-15 11:57:00,"[„ÅÇ„Å£„Å®„ÅÑ„ÅÜÈñì„Å´ÁµÇ„Çè„Å£„ÅüÊÑü Ê•Ω„Åó„Åã„Å£„Åü , Ê•Ω„Åó„Åã„Å£„Åü„Åß„Åô Á¥†Êô¥„Çâ„Åó„ÅÑÊîæÈÄÅ„Å´ÊãçÊâã ÂÆâ‰Ωè„Åï„Çì...",0.446219,0.674674,0.798473,0.324671
118,2023-01-15 11:58:00,"[ÂÆâ‰Ωè„Åï„Çì„ÅäÂ§ß‰∫ã„Å´Âñâ„ÅØÁÑ°ÁêÜ„Åô„Çã„Å®Èï∑Âºï„Åè„Åã„Çâ„ÅîÁÑ°ÁêÜ„Å™„Åï„Çâ„Åö, „Ç∑„Éä„É¢„É≥„ÅØÊúüÈôêÂàá„Çå„Å¶„Å¶„ÇÇÂïèÈ°å„Å™„Åè...",0.318259,0.770827,0.836587,0.351700


In [11]:
df_texts_by_minute.to_csv('220115_similarity.csv', index=False)