The code below creates datasets for tuning and modeling (research clones) ФедЮля user eva.ru

In [1]:
import os
import datetime
import pandas as pd
import numpy as np

In [2]:
#There are 2 authors with the same name and different IDs existed at the same period of time
fedulya_ids = [250487,186159]


In [3]:
Data='/home/kate/Projects/eva/Data'

Messages_filename='Main/Messages.csv'
Messages_full_filename=os.path.join(Data, Messages_filename)

OriginalMessages_filename='Main/OriginalMessages.csv'
OriginalMessages_full_filename=os.path.join(Data, OriginalMessages_filename)

Authors_filename='Main/Authors.csv'
Authors_full_filename=os.path.join(Data, Authors_filename)

ta_extension_filename='Main/TextAttributes_ext.csv'
ta_extension_full_filename=os.path.join(Data, ta_extension_filename)


ft_filename='Subprojects/Fedulya/fedulya_data_for_finetuning.csv'
ft_full_filename=os.path.join(Data, ft_filename)


SimilarAuthors_filename='Subprojects/Fedulya/fedulya_similar_authors.csv'
SimilarAuthors_full_filename=os.path.join(Data, SimilarAuthors_filename)

all_fedulya_messages_filename='Subprojects/Fedulya/all_fedulya_and_clones_messages.csv'
all_fedulya_messages_full_filename=os.path.join(Data, all_fedulya_messages_filename)

#medium size posts
short_message_limit_words=5
medium_message_limit_words=100

In [4]:
Messages = pd.read_csv(Messages_full_filename, error_bad_lines=False, index_col=False)

In [5]:
AMessages = Messages[Messages['Author_Id']!=0].copy(deep=True)

In [6]:
del Messages

## Messages, no less then 5 and no more then 100 words, from all topics where first 2 Fedulyas present

In [7]:
topic_id_with_fedulya=AMessages[AMessages['Author_Id'].isin(fedulya_ids)]['Topic_Id'].unique()

In [8]:
len(topic_id_with_fedulya)

678

In [9]:
AMessages=AMessages[AMessages['Topic_Id'].isin(topic_id_with_fedulya)].copy(deep=True)

In [10]:
AMessages=AMessages[AMessages['Author_Id']!=0]

In [11]:
len(AMessages)

57306

In [12]:
AMessages.columns

Index(['Message_Id', 'Timestamp', 'Topic_1st_Message', 'Parent_Id',
       'Author_Id', 'author', 'Topic_Id', 'Topic', 'Chapter_Id', 'Chapter',
       'message', 'cnt_childs', 'cnt_immediate_childs'],
      dtype='object')

There is no length of message in words in AMessage dataset, load additional data

In [13]:
ta_ext = pd.read_csv(ta_extension_full_filename, error_bad_lines=False, index_col=False)

In [14]:
ta_ext.columns

Index(['Message_Id', 'Timestamp', 'Topic_1st_Message', 'Parent_Id',
       'Author_Id', 'author', 'Topic_Id', 'Topic', 'Chapter_Id', 'Chapter',
       'message_characters', 'message_words', 'emojis', 'images', 'links',
       'original_paragraphs', 'actual_paragraphs', 'avg_paragraph_characters',
       'avg_paragraph_words', 'cnt_childs', 'cnt_immediate_childs',
       'flg_excessive_exclamations', 'flg_excessive_questions',
       'flg_excessive_other', 'num_Adj', 'num_unique_Adj', 'num_Nouns',
       'num_unique_Nouns', 'num_Verb', 'num_unique_Verb', 'num_tokens',
       'num_unique_tokens', 'num_syllables', 'num_long_words',
       'num_unique_long_words', 'num_commas', 'num_exclamations',
       'num_questions', 'num_words', 'ASW', 'PLW', 'TTR', 'TTR_A', 'TTR_N',
       'TTR_V', 'NAV', 'UNAV', 'fraction_of_commas',
       'fraction_of_exclamations', 'fraction_of_questions', 'fraction_of_Adj',
       'fraction_of_Nouns', 'fraction_of_Verbs', 'num_sent', 'avg_sent_len'],
      dtype

In [15]:
AMessages=pd.merge(AMessages, ta_ext[['Message_Id','message_words']], left_on='Message_Id', right_on='Message_Id', how='inner')

In [16]:
del ta_ext

In [17]:
AMessages= AMessages[((AMessages['message_words']>short_message_limit_words) & (AMessages['message_words']<=medium_message_limit_words))]

In [18]:
len(AMessages)

45697

Set target column

In [19]:
AMessages['target']=['fedulya' if x in fedulya_ids else 'Other' for x in AMessages['Author_Id']]

In [20]:
AMessages.groupby(['target']).size().reset_index(name='counts').sort_values('counts', ascending=False)

Unnamed: 0,target,counts
0,Other,41062
1,fedulya,4635


Joining original forum post text, without cleaning html tags etc

In [21]:
OriginalMessages = pd.read_csv(OriginalMessages_full_filename, error_bad_lines=False, index_col=False)

In [22]:
AMessages=pd.merge(AMessages, OriginalMessages[['Message_Id','original_message']], left_on='Message_Id', right_on='Message_Id', how='inner')

In [23]:
AMessages.columns

Index(['Message_Id', 'Timestamp', 'Topic_1st_Message', 'Parent_Id',
       'Author_Id', 'author', 'Topic_Id', 'Topic', 'Chapter_Id', 'Chapter',
       'message', 'cnt_childs', 'cnt_immediate_childs', 'message_words',
       'target', 'original_message'],
      dtype='object')

In [24]:
len(AMessages)

45697

In [25]:
del OriginalMessages

In [26]:
AMessages=AMessages[['Message_Id','Author_Id','author', 'message','original_message','target']]

In [27]:
AMessages.to_csv(ft_full_filename, header=True, index=False)

## Dataset to find Fedulya's clones (model application)

### Authors to check

In [28]:
Authors = pd.read_csv(Authors_full_filename, error_bad_lines=False, index_col=False)

In [29]:
Authors.head()

Unnamed: 0,Author_Id,author,cnt_messages,First_Message_Id,Last_Message_Id,first_message_ts,last_message_ts,Binned Number of Messages,cnt_2021_messages,num_of_tneg_messages,...,avg_fraction_of_Verbs,avg_TTR,avg_TTR_A,avg_TTR_N,avg_TTR_V,avg_NAV,avg_UNAV,avg_fraction_of_Nouns,avg_ASW,avg_PLW
0,23,volv ♧ Clair,1535,67913773,101696362,2011-07-06 11:00:00,2020-11-09 23:07:00,"(1000, 5000]",0.0,267.0,...,0.168575,0.866406,0.662028,0.92154,0.872181,1.43301,2.290852,0.272158,2.099146,0.139118
1,27,Oxygen 27 F*,16,97031390,102288727,2018-05-17 14:17:00,2021-02-19 17:01:00,"(0, 100]",0.0,8.0,...,0.135263,0.855567,0.692308,0.972494,0.769231,1.280186,1.888462,0.273964,1.911883,0.088368
2,32,Редакция Евы,784,80470511,96142872,2013-04-29 13:46:00,2017-12-11 20:44:00,"(100, 1000]",0.0,96.0,...,0.142459,0.904268,0.905599,0.88255,0.964668,1.868945,3.498784,0.329188,2.130166,0.160048
3,35,Малефисента ⚜**,3415,51064403,101933631,2009-11-02 14:26:00,2020-12-21 17:49:00,"(1000, 5000]",0.0,1433.0,...,0.167977,0.890777,0.605887,0.952464,0.913617,1.516702,2.006752,0.236029,1.895766,0.094588
4,36,Доктор Филатов SD*,4992,81110399,101760074,2013-06-18 17:11:00,2020-11-20 18:00:00,"(1000, 5000]",0.0,1855.0,...,0.166422,0.896571,0.801671,0.901457,0.923862,1.785766,2.255664,0.247477,2.059361,0.141871


In [30]:
Authors['first_message_ts']=pd.to_datetime(Authors['first_message_ts'], format='%Y-%m-%d %H:%M:%S')
Authors['last_message_ts']=pd.to_datetime(Authors['last_message_ts'], format='%Y-%m-%d %H:%M:%S')

In [31]:
#known Fedulya last message
last_message_ts = Authors[Authors['Author_Id'].isin(fedulya_ids)]['last_message_ts'].max()

In [32]:
#All new authors after the date
len(Authors[Authors['first_message_ts']>last_message_ts])

14076

In [33]:
#Authors posted after the last fedula's date with reasonable number of messages
#5000 because it's definetly not a clone
#100 because with less messages authors write as Anonymous
#TotalDays>1 because authors with 100+ messages in very few days are spamers most likely and they are banned
len(Authors[((Authors['first_message_ts']>last_message_ts) & (Authors['TotalDays']>1) & (Authors['cnt_messages']>100) & (Authors['cnt_messages']<5000))])

1029

In [34]:
Authors = Authors[((Authors['Author_Id'].isin(fedulya_ids)) | ((Authors['first_message_ts']>last_message_ts) & (Authors['TotalDays']>1) & (Authors['cnt_messages']>100) & (Authors['cnt_messages']<5000)))]

### Can we limit the number of authors to check even more?

In [35]:
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
columns_to_compare=[
'fraction_of_tneg_messages',
'fraction_of_tpos_messages',
'fraction_of_Large_Topics',
'fraction_of_messages_with_immediate_responses',
'avg_immediate_responses',
'fraction_of_messages_with_discussion',
'avg_responses',
'fraction_of_neg_rersponses',
'fraction_of_pos_rersponses',
'fraction_of_messages_with_emojis',
'fraction_of_messages_with_images',
'fraction_of_messages_with_links',
'fraction_of_messages_with_excessive_exclamations',
'fraction_of_messages_with_excessive_questions',
'fraction_of_messages_with_excessive_other',
'fraction_of_medium_with_commas',
'avg_num_commas',
'avg_sent_len',
'avg_num_sent',
'avg_ASW',
'avg_PLW',
'avg_fraction_of_Adj',
'avg_fraction_of_Verbs',
'avg_fraction_of_Nouns',
'avg_TTR',
'avg_TTR_A',
'avg_TTR_N',
'avg_TTR_V',
'avg_NAV',
'avg_UNAV']

In [37]:
fedulya=np.array(Authors[Authors['Author_Id'].isin(fedulya_ids)][columns_to_compare])
fedulya

array([[6.28571429e-01, 1.33333333e-01, 0.00000000e+00, 8.23529412e-01,
        1.00840336e+00, 5.96638655e-01, 7.38028169e+00, 4.16666667e-01,
        2.58333333e-01, 1.31092437e+00, 0.00000000e+00, 0.00000000e+00,
        1.68067227e-02, 5.04201681e-02, 0.00000000e+00, 2.00000000e+00,
        2.33333333e+00, 7.88155396e+00, 3.95238095e+00, 1.89055082e+00,
        1.05569452e-01, 1.01268605e-01, 1.43026067e-01, 2.42852448e-01,
        9.14446027e-01, 7.84829932e-01, 9.51166133e-01, 8.56452106e-01,
        1.69460503e+00, 2.49675084e+00],
       [5.57836645e-01, 1.98675497e-01, 9.28571429e-01, 6.95207324e-01,
        9.81870400e-01, 5.36169449e-01, 9.63977235e+00, 4.44241316e-01,
        2.44972578e-01, 1.02297613e+00, 0.00000000e+00, 1.92066056e-02,
        6.47998564e-02, 6.03123317e-02, 3.59001975e-04, 1.80684327e+00,
        2.19933775e+00, 8.00626437e+00, 3.30750552e+00, 1.88159161e+00,
        1.04835281e-01, 9.49267020e-02, 1.53531414e-01, 2.39949357e-01,
        8.91542945e-01,

In [38]:
others=np.array(Authors[columns_to_compare])
others

array([[0.31010453, 0.10801394, 0.35      , ..., 0.87218147, 1.43300956,
        2.29085165],
       [0.12467532, 0.20519481, 0.10686016, ..., 0.9646684 , 1.86894488,
        3.49878412],
       [0.45532646, 0.16102111, 0.5       , ..., 0.92386224, 1.78576606,
        2.2556641 ],
       ...,
       [0.45989305, 0.04812834, 0.        , ..., 0.91584755, 1.53872252,
        2.10129249],
       [0.6746988 , 0.1686747 , 0.        , ..., 0.91176725, 1.8948778 ,
        1.97651328],
       [0.48031496, 0.04724409, 0.        , ..., 0.87512282, 1.44298199,
        1.97819462]])

In [39]:
result=cosine_similarity(fedulya,others)

In [40]:
Authors['fedulya_sim']=result[0]

In [41]:
SimilarAuthors = Authors[Authors['fedulya_sim']>0.94].sort_values(['fedulya_sim'],ascending=False)

In [42]:
SimilarAuthors['Author_Id'].nunique()

903

In [43]:
SimilarAuthors.to_csv(SimilarAuthors_full_filename, header=True, index=False)

In [44]:
SimilarAuthors[SimilarAuthors['Author_Id'].isin([250487,
186159,
266936,
269514,
466036,
474446,
482362
])]

Unnamed: 0,Author_Id,author,cnt_messages,First_Message_Id,Last_Message_Id,first_message_ts,last_message_ts,Binned Number of Messages,cnt_2021_messages,num_of_tneg_messages,...,avg_TTR,avg_TTR_A,avg_TTR_N,avg_TTR_V,avg_NAV,avg_UNAV,avg_fraction_of_Nouns,avg_ASW,avg_PLW,fedulya_sim
10417,186159,ФедЮля **,119,52892781,54365549,2009-12-28 03:36:00,2010-02-19 03:26:00,"(100, 1000]",0.0,66.0,...,0.914446,0.78483,0.951166,0.856452,1.694605,2.496751,0.242852,1.890551,0.105569,1.0
13120,250487,ФедюлЯ +,5571,51039367,54449713,2009-11-01 18:41:00,2010-02-22 16:43:00,"(5000, 10000]",0.0,2527.0,...,0.891543,0.706654,0.941423,0.88296,1.604861,2.291479,0.239949,1.881592,0.104835,0.985593
17469,466036,Федюля Иммодиум для анусов **,3776,89071044,90738692,2015-03-23 04:31:00,2015-10-06 14:09:00,"(1000, 5000]",0.0,1937.0,...,0.890116,0.702328,0.935249,0.887036,1.609833,2.31927,0.246536,1.852998,0.098938,0.980583
13808,266936,ФедЮлЯ _,287,55112376,55362895,2010-03-16 18:53:00,2010-03-23 23:41:00,"(100, 1000]",0.0,145.0,...,0.89557,0.726564,0.942639,0.898513,1.665785,2.59315,0.227284,1.824813,0.093423,0.973728
13926,269514,ФедЮЛЯ *,2205,55769592,57499671,2010-04-05 23:20:00,2010-06-09 21:45:00,"(1000, 5000]",0.0,1033.0,...,0.89461,0.719478,0.936984,0.890687,1.624545,2.425547,0.242872,1.82689,0.09472,0.972686
17777,482362,Федюля - ЛопедиуМ для анусов +,756,91076569,91431403,2015-11-14 00:24:00,2015-12-26 00:48:00,"(100, 1000]",0.0,408.0,...,0.896454,0.73669,0.942986,0.880845,1.704242,2.50363,0.248158,1.831265,0.094628,0.962056
17612,474446,Федюля-иммодиум для анусов *,1103,90122599,91444055,2015-07-16 23:57:00,2015-12-27 23:30:00,"(1000, 5000]",0.0,576.0,...,0.886183,0.723444,0.921615,0.899047,1.697042,2.380089,0.237676,1.845607,0.092953,0.947062


In [45]:
SimilarAuthors.tail()

Unnamed: 0,Author_Id,author,cnt_messages,First_Message_Id,Last_Message_Id,first_message_ts,last_message_ts,Binned Number of Messages,cnt_2021_messages,num_of_tneg_messages,...,avg_TTR,avg_TTR_A,avg_TTR_N,avg_TTR_V,avg_NAV,avg_UNAV,avg_fraction_of_Nouns,avg_ASW,avg_PLW,fedulya_sim
7234,128611,ОСЕНЬ 128611 SD,208,58585470,103222180,2010-08-09 10:44:00,2021-08-25 08:42:00,"(100, 1000]",0.0,64.0,...,0.88864,0.784524,0.913406,0.882663,1.867491,2.079126,0.225348,1.717278,0.080821,0.940777
18600,529152,redeleven_4753001 F**,195,95736970,101629536,2017-10-04 19:56:00,2020-10-29 00:02:00,"(100, 1000]",0.0,89.0,...,0.872225,0.699304,0.911661,0.883514,1.606016,1.888966,0.229526,1.759063,0.080452,0.940751
12950,246731,_Milanna_ SD,148,83286216,103027924,2013-12-04 16:04:00,2021-07-15 19:11:00,"(100, 1000]",0.0,67.0,...,0.93159,0.775926,0.960565,0.941766,1.78257,2.178867,0.241652,1.966208,0.10092,0.940564
18846,545633,EvaRu 545633 F**,206,96609853,103118933,2018-03-02 21:06:00,2021-08-03 01:43:00,"(100, 1000]",0.0,77.0,...,0.874707,0.741243,0.897896,0.888549,1.776289,1.869931,0.195245,1.835606,0.089591,0.940525
14828,303507,Зеленые рукава девицы Болейн *,129,76611974,82847857,2012-09-07 20:42:00,2013-11-03 23:36:00,"(100, 1000]",0.0,44.0,...,0.914841,0.75,0.932102,0.957397,1.741979,2.030163,0.223286,1.96957,0.10534,0.940051


In [46]:
SimilarAuthors.head()

Unnamed: 0,Author_Id,author,cnt_messages,First_Message_Id,Last_Message_Id,first_message_ts,last_message_ts,Binned Number of Messages,cnt_2021_messages,num_of_tneg_messages,...,avg_TTR,avg_TTR_A,avg_TTR_N,avg_TTR_V,avg_NAV,avg_UNAV,avg_fraction_of_Nouns,avg_ASW,avg_PLW,fedulya_sim
10417,186159,ФедЮля **,119,52892781,54365549,2009-12-28 03:36:00,2010-02-19 03:26:00,"(100, 1000]",0.0,66.0,...,0.914446,0.78483,0.951166,0.856452,1.694605,2.496751,0.242852,1.890551,0.105569,1.0
12640,239519,Mari5000 F**,451,100866108,103104668,2020-06-07 21:54:00,2021-07-30 21:47:00,"(100, 1000]",327.0,152.0,...,0.859542,0.658925,0.899019,0.886689,1.55885,2.018277,0.218422,1.786401,0.083906,0.995243
14212,277159,"Допустим, у меня рыбка SD**",104,65600126,99827517,2011-03-31 19:28:00,2019-12-09 11:27:00,"(100, 1000]",0.0,29.0,...,0.87989,0.678571,0.928292,0.861595,1.48931,1.93143,0.228915,1.905038,0.109535,0.994352
1341,35269,атмосфера C.B. SD*,256,56518497,103104750,2010-04-30 18:45:00,2021-07-30 22:08:00,"(100, 1000]",0.0,55.0,...,0.886146,0.687075,0.946802,0.862714,1.547664,1.998544,0.236585,1.834388,0.100539,0.994171
16585,399265,brunetka777 SD*,390,80609438,95023523,2013-05-11 17:51:00,2017-05-20 16:50:00,"(100, 1000]",0.0,155.0,...,0.921243,0.779355,0.953531,0.903418,1.692919,2.478266,0.28021,2.186672,0.152407,0.993391


In [47]:
SimilarAuthors[SimilarAuthors['author'].str.contains('Фед')][['Author_Id','author','first_message_ts', 'last_message_ts','cnt_messages','cnt_medium_messages','fedulya_sim']]

Unnamed: 0,Author_Id,author,first_message_ts,last_message_ts,cnt_messages,cnt_medium_messages,fedulya_sim
10417,186159,ФедЮля **,2009-12-28 03:36:00,2010-02-19 03:26:00,119,105.0,1.0
13120,250487,ФедюлЯ +,2009-11-01 18:41:00,2010-02-22 16:43:00,5571,4530.0,0.985593
17469,466036,Федюля Иммодиум для анусов **,2015-03-23 04:31:00,2015-10-06 14:09:00,3776,3196.0,0.980583
13808,266936,ФедЮлЯ _,2010-03-16 18:53:00,2010-03-23 23:41:00,287,243.0,0.973728
13926,269514,ФедЮЛЯ *,2010-04-05 23:20:00,2010-06-09 21:45:00,2205,1864.0,0.972686
17777,482362,Федюля - ЛопедиуМ для анусов +,2015-11-14 00:24:00,2015-12-26 00:48:00,756,666.0,0.962056
17612,474446,Федюля-иммодиум для анусов *,2015-07-16 23:57:00,2015-12-27 23:30:00,1103,952.0,0.947062


## Messages to analyze

In [48]:
SimilarAuthors = pd.read_csv(SimilarAuthors_full_filename, error_bad_lines=False, index_col=False)

In [49]:
Messages = pd.read_csv(Messages_full_filename, error_bad_lines=False, index_col=False)

In [50]:
SMessages=pd.merge(Messages, SimilarAuthors[['Author_Id']], left_on='Author_Id', right_on='Author_Id', how='inner')

In [51]:
del Messages

In [52]:
len(SMessages)

542068

Medium Size messages only (need to load additional data)

In [53]:
ta_ext = pd.read_csv(ta_extension_full_filename, error_bad_lines=False, index_col=False) 

In [54]:
SMessages=pd.merge(SMessages, ta_ext[['Message_Id', 'message_words']], left_on='Message_Id', right_on='Message_Id', how='inner')

In [55]:
del ta_ext

In [56]:
SMessages= SMessages[((SMessages['message_words']>short_message_limit_words) & (SMessages['message_words']<=medium_message_limit_words))]

In [57]:
len(SMessages)

435070

Joining original forum post text, without cleaning html tags etc

In [58]:
OriginalMessages = pd.read_csv(OriginalMessages_full_filename, error_bad_lines=False, index_col=False)

In [59]:
SMessages=pd.merge(SMessages, OriginalMessages[['Message_Id','original_message']], left_on='Message_Id', right_on='Message_Id', how='inner')

In [60]:
del OriginalMessages

In [61]:
SMessages.columns

Index(['Message_Id', 'Timestamp', 'Topic_1st_Message', 'Parent_Id',
       'Author_Id', 'author', 'Topic_Id', 'Topic', 'Chapter_Id', 'Chapter',
       'message', 'cnt_childs', 'cnt_immediate_childs', 'message_words',
       'original_message'],
      dtype='object')

In [62]:
SMessages=SMessages[['Message_Id', 'Timestamp', 'Topic_1st_Message', 'Parent_Id',
       'Author_Id',  'Topic_Id', 'Topic', 'Chapter_Id', 'Chapter',
       'original_message', 'message', 'cnt_childs', 'cnt_immediate_childs']]

Unifing author names (can vary in posts) for further analysis

In [63]:
#Authors = pd.read_csv(Authors_full_filename, error_bad_lines=False, index_col=False)

In [64]:
SMessages=pd.merge(SMessages, Authors[['Author_Id','author']],left_on='Author_Id', right_on='Author_Id', how='inner')

In [65]:
len(SMessages)

435070

In [66]:
SMessages.head()

Unnamed: 0,Message_Id,Timestamp,Topic_1st_Message,Parent_Id,Author_Id,Topic_Id,Topic,Chapter_Id,Chapter,original_message,message,cnt_childs,cnt_immediate_childs,author
0,68156918,2011-07-20 15:29:00,N,0,240561,1588239,Ссылки,137,Детская психология и развитие,"<div class=""body"">Я все могу, где скачать? И б...","Я все могу, где скачать? И беби энштейн? Что-т...",0,0,Олечкая **
1,65203211,2011-03-19 15:06:00,N,65031236,240561,2604407,методичка по развитию,137,Детская психология и развитие,"<div class=""body"">а можно мне тоже опыты vidi2...",а можно мне тоже опыты vidi2005olga@mail.ru за...,0,0,Олечкая **
2,72508255,2012-01-22 16:37:00,N,0,240561,2859257,заикание,137,Детская психология и развитие,"<div class=""body"">мы ездили в Арлилию, это в М...","мы ездили в Арлилию, это в Медведково. На конс...",0,0,Олечкая **
3,81885636,2013-08-29 09:39:00,N,81885143,240561,3159289,Мы напрягаем родителей?,137,Детская психология и развитие,"<div class=""body"">Как мама дочки, скажу. Мне б...","Как мама дочки, скажу. Мне было бы приятно.)))...",2,1,Олечкая **
4,81888444,2013-08-29 12:57:00,N,81888288,240561,3159289,Мы напрягаем родителей?,137,Детская психология и развитие,"<div class=""body"">+1<br/><br/>Не в обиду, но к...","+1Не в обиду, но когда тут пишут, что я фен не...",10,1,Олечкая **


In [67]:
SMessages.to_csv(all_fedulya_messages_full_filename, header=True, index=False)

In [68]:
SMessages[SMessages['author'].str.contains('Фед')]['Author_Id'].unique()

array([466036, 250487, 186159, 266936, 269514, 474446, 482362])