In [5]:
import pandas as pd
import numpy as np
import os

In [13]:
# Folder path
FOLDER_PATH = "Dataset_test"

# List of all the csv files
csv_files = [
    "AlanSantana.csv",
    "CryptoColugo.csv",
    "CRYPTOMOJO_TA.csv",
    "FieryTrading.csv",
    "MoralDisciple.csv",
    "RocketBomb.csv",
    "weslad.csv",
    "without_worries.csv",
    "Xanrox.csv"
]

# Create empty list for all the individual dataframes (in case we might need them later)
dataframes = []

# Loop through each file and add them to the dataframes
for i in csv_files:
    file_path = os.path.join(FOLDER_PATH, i)
    df = pd.read_csv(file_path, dtype={"Timestamp": "int64"})  # making sure the time is displayed in unixtimestamp
    dataframes.append(df)

# Combining all the dataframes into one
df_all = pd.concat(dataframes, ignore_index=True)

df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...
...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ..."


In [15]:
# Saving the new dataframe
all_csv_file = os.path.join(FOLDER_PATH, "All_trader.csv")
df_all.to_csv(all_csv_file, index=False, encoding="utf-8")

Now we will add some extra information about each post, this might be used for further analysis

In [17]:
# create new column with word count for each post
df_all["Word count"] = df_all["Text"].apply(lambda x: len(x.split()))
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259
...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19


In [20]:
from nltk.corpus import stopwords
# create list of stop words
stop_words = stopwords.words("english")
len(stop_words)

179

In [25]:
# create column for amount of stop words in each post
df_all["Stop words count"] = df_all["Text"].apply(lambda x: len([word for word in x.split() if word.lower() in stop_words]))  # make each word lowercase so we wont miss any words
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122
...,...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3,0
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7


In [28]:
# create column for percantage of stop words in each post
df_all["Stop word %"] = round((df_all["Stop words count"] / df_all["Word count"]) * 100, 2)
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10
...,...,...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3,0,0.00
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84
