In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Folder path
FOLDER_PATH = "Dataset_test"

# List of all the csv files
csv_files = [
    "AlanSantana.csv",
    "CryptoColugo.csv",
    "CRYPTOMOJO_TA.csv",
    "FieryTrading.csv",
    "MoralDisciple.csv",
    "RocketBomb.csv",
    "weslad.csv",
    "without_worries.csv",
    "Xanrox.csv"
]

# Create empty list for all the individual dataframes (in case we might need them later)
dataframes = []

# Loop through each file and add them to the dataframes
for i in csv_files:
    file_path = os.path.join(FOLDER_PATH, i)
    df = pd.read_csv(file_path, dtype={"Timestamp": "int64"})  # making sure the time is displayed in unixtimestamp
    dataframes.append(df)

# Combining all the dataframes into one
df_all = pd.concat(dataframes, ignore_index=True)

df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...
...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ..."


In [3]:
# Saving the new dataframe
all_csv_file = os.path.join(FOLDER_PATH, "All_trader.csv")
df_all.to_csv(all_csv_file, index=False, encoding="utf-8")

Now we will add some extra information about each post, this might be used for further analysis

In [4]:
# create new column with word count for each post
df_all["Word count"] = df_all["Text"].apply(lambda x: len(x.split()))
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259
...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19


In [5]:
from nltk.corpus import stopwords
# create list of stop words
stop_words = stopwords.words("english")
# len(stop_words)
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [6]:
# create column for amount of stop words in each post
df_all["Stop words count"] = df_all["Text"].apply(lambda x: len([word for word in x.split() if word.lower() in stop_words]))  # make each word lowercase so we wont miss any words
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122
...,...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3,0
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7


In [7]:
# create column for percantage of stop words in each post
df_all["Stop word %"] = round((df_all["Stop words count"] / df_all["Word count"]) * 100, 2)
df_all

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10
...,...,...,...,...,...,...,...,...,...
770,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 6,1715965894,Comment:\nNew analysis:,3,0,0.00
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84


Now we try to clean the dataset as much as possible

In [8]:
# we remove all uppercase letters (since this doesn't matter for our analysis)
df_all["lower_case_text"] = df_all["Text"].apply(lambda x: " ".join(word.lower() for word in x.split()))
df_all["lower_case_text"]

0      i would like to call your attention to bitcoin...
1      comment: lower high on the monthly timeframe.....
2      signals are available everywhere... and new da...
3      comment: 🅱️ time to trade right now we are wit...
4      comment: 🍀 affirmations to attract abundance w...
                             ...                        
770                               comment: new analysis:
771    comment: price perfectly respected the paralle...
772    the price action of bitcoin is relatively bori...
773    comment: on the 1h chart, bitcoin is forming a...
774    bitcoin has finished its corrective pattern (a...
Name: lower_case_text, Length: 775, dtype: object

In [9]:
import re
def remove_punctuation(text):
    pattern = r"(?<!\w)-(?!\w)|(?<!\d)\.(?!\d)|[^\w\s.'-]"
    return re.sub(pattern, '', text)
df_all["Cleaned Text"] = df_all["lower_case_text"].apply(remove_punctuation)
df_all["Cleaned Text"]

0      i would like to call your attention to bitcoin...
1      comment lower high on the monthly timeframe ma...
2      signals are available everywhere and new data ...
3      comment  time to trade right now we are witnes...
4      comment  affirmations to attract abundance whe...
                             ...                        
770                                 comment new analysis
771    comment price perfectly respected the parallel...
772    the price action of bitcoin is relatively bori...
773    comment on the 1h chart bitcoin is forming a h...
774    bitcoin has finished its corrective pattern ab...
Name: Cleaned Text, Length: 775, dtype: object

In [10]:
df_all["Removed stop words"] = df_all["Cleaned Text"].apply(lambda x: " ".join(word for word in x.split() if word not in stop_words))
df_all["Removed stop words"]

0      would like call attention bitcoin's monthly ch...
1      comment lower high monthly timeframe maximum p...
2      signals available everywhere new data coming t...
3      comment time trade right witnessing bounce tim...
4      comment affirmations attract abundance wheneve...
                             ...                        
770                                 comment new analysis
771    comment price perfectly respected parallel cha...
772    price action bitcoin relatively boring higher ...
773    comment 1h chart bitcoin forming head shoulder...
774    bitcoin finished corrective pattern abc zigzag...
Name: Removed stop words, Length: 775, dtype: object

We removed the stop words but there are still some words that aren't relevant to the analysis, so we need to remove those aswell

In [11]:
# first we split each post into all the words inside
every_word = " ".join(df_all["Removed stop words"]).split()
# create a pandas series where we can count each occurunce of the words in all the posts
word_count = pd.Series(every_word).value_counts()
word_count

comment          707
bitcoin          455
price            263
chart            215
bullish          214
                ... 
option             1
high-leverage      1
zigzag             1
door               1
estimated          1
Name: count, Length: 3679, dtype: int64

In [12]:
# now we need to make an extra list of unnecessary words, so we can remove them from the list
unnecessary_words = ["comment", "i'm", "namaste", "man", "thank", "thanks", "i've", "please", "we're", "yes", "haha", "moraldisciple", "md", "hello", "i'll", "we've", "ww", "friends", "kateryna", "ii", "iii", "dyor", "sincerely", "nfa", "beautiful", "hmm", "update2", "guys", "likes", "messages", "___", "happy", "name", "men", "update1", "xcom", "shit", "imagine", "friendly", "k", "imo", "hey", "weve", "yeah", "tradingviewcom", "tradingview", "fun", "wow", "telegram", "hah", "cry", "fking", "free", "tips", "join", "lol", "welcome", "sms", "honestly", "personally", "commenting", "tradingview's", "hahaha", "sorry", "fck", "community", "____", "__", "users", "sincere", "dear-dear", "hahah", "angel's", "pizza", "-x", "xdd", "shut", "fool", "idiot", "bro", "nah", "moraldiciple", "wee", "they're", "kitten", "mount", "everest", "boys", "apologize", "subscribe", "youre", "congratulations", "dearest", "vehicle", "eternal", "willl", "lovely", "clean", "pray", "ashamed", "contemplate", "meditate", "judiciously", "constructively", "nay", "idiots", "i'd", "wwwtradingviewcomger-indicator-45015", "elephants", "elephants", "xcomvivek4real_st1794803021442166874", "hahaxd", "fucked", "brainless", "tik-tak-tik-tak", "females", "update3", "seer", "fuck", "hahahagha", "anywayyy", "email", "notifications", "machinexdxxdxdxd", "oops"]
len(unnecessary_words)

125

In [13]:
df_all["Final Text"] = df_all["Removed stop words"].apply(lambda x: " ".join(word for word in x.split() if word not in unnecessary_words))
df_all["Final Text"]

0      would like call attention bitcoin's monthly ch...
1      lower high monthly timeframe maximum pain inco...
2      signals available everywhere new data coming t...
3      time trade right witnessing bounce time import...
4      affirmations attract abundance whenever feelin...
                             ...                        
770                                         new analysis
771    price perfectly respected parallel channel kee...
772    price action bitcoin relatively boring higher ...
773    1h chart bitcoin forming head shoulders patter...
774    bitcoin finished corrective pattern abc zigzag...
Name: Final Text, Length: 775, dtype: object

In [14]:
# there are some rows that only have 3 or less words in them because they originally also contain a picture, but since we dont want to look at that we want to remove those rows
def word_count(text):
    return len(text.split())

df_all['Word_Count'] = df_all["Final Text"].apply(word_count)
df_final = df_all[df_all['Word_Count'] > 3]
df_final = df_final.drop(columns=['Word_Count'])
df_final

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,lower_case_text,Cleaned Text,Removed stop words,Final Text
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,i would like to call your attention to bitcoin...,i would like to call your attention to bitcoin...,would like call attention bitcoin's monthly ch...,would like call attention bitcoin's monthly ch...
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,comment: lower high on the monthly timeframe.....,comment lower high on the monthly timeframe ma...,comment lower high monthly timeframe maximum p...,lower high monthly timeframe maximum pain inco...
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signals are available everywhere... and new da...,signals are available everywhere and new data ...,signals available everywhere new data coming t...,signals available everywhere new data coming t...
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,comment: 🅱️ time to trade right now we are wit...,comment time to trade right now we are witnes...,comment time trade right witnessing bounce tim...,time trade right witnessing bounce time import...
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,comment: 🍀 affirmations to attract abundance w...,comment affirmations to attract abundance whe...,comment affirmations attract abundance wheneve...,affirmations attract abundance whenever feelin...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,"comment: bitcoin is pumping, thanks for readin...",comment bitcoin is pumping thanks for reading ...,comment bitcoin pumping thanks reading idea,bitcoin pumping reading idea
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,comment: price perfectly respected the paralle...,comment price perfectly respected the parallel...,comment price perfectly respected parallel cha...,price perfectly respected parallel channel kee...
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,the price action of bitcoin is relatively bori...,the price action of bitcoin is relatively bori...,price action bitcoin relatively boring higher ...,price action bitcoin relatively boring higher ...
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,"comment: on the 1h chart, bitcoin is forming a...",comment on the 1h chart bitcoin is forming a h...,comment 1h chart bitcoin forming head shoulder...,1h chart bitcoin forming head shoulders patter...


# lemmatization

In [15]:
#!pip install textblob

In [16]:
from textblob import Word

In [17]:
def lemmatize_and_count(text):
    original_words = text.split()
    lemmatized_words = [Word(word).lemmatize() for word in original_words]
    lemmatized_count = sum(1 for original, lemmatized in zip(original_words, lemmatized_words) if original != lemmatized)
    return ' '.join(lemmatized_words), lemmatized_count

# Apply the function to the DataFrame and create new columns
df_final['Lemmatized'], df_final['Lemmatized_Count'] = zip(*df_final['Final Text'].apply(lemmatize_and_count))
df_final['Lemmatized_Count']

0       3
1       0
2      15
3      18
4      14
       ..
769     0
771     1
772    13
773     1
774     4
Name: Lemmatized_Count, Length: 647, dtype: int64

In [18]:
df_final

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,lower_case_text,Cleaned Text,Removed stop words,Final Text,Lemmatized,Lemmatized_Count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,i would like to call your attention to bitcoin...,i would like to call your attention to bitcoin...,would like call attention bitcoin's monthly ch...,would like call attention bitcoin's monthly ch...,would like call attention bitcoin's monthly ch...,3
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,comment: lower high on the monthly timeframe.....,comment lower high on the monthly timeframe ma...,comment lower high monthly timeframe maximum p...,lower high monthly timeframe maximum pain inco...,lower high monthly timeframe maximum pain inco...,0
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signals are available everywhere... and new da...,signals are available everywhere and new data ...,signals available everywhere new data coming t...,signals available everywhere new data coming t...,signal available everywhere new data coming ti...,15
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,comment: 🅱️ time to trade right now we are wit...,comment time to trade right now we are witnes...,comment time trade right witnessing bounce tim...,time trade right witnessing bounce time import...,time trade right witnessing bounce time import...,18
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,comment: 🍀 affirmations to attract abundance w...,comment affirmations to attract abundance whe...,comment affirmations attract abundance wheneve...,affirmations attract abundance whenever feelin...,affirmation attract abundance whenever feeling...,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,"comment: bitcoin is pumping, thanks for readin...",comment bitcoin is pumping thanks for reading ...,comment bitcoin pumping thanks reading idea,bitcoin pumping reading idea,bitcoin pumping reading idea,0
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,comment: price perfectly respected the paralle...,comment price perfectly respected the parallel...,comment price perfectly respected parallel cha...,price perfectly respected parallel channel kee...,price perfectly respected parallel channel kee...,1
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,the price action of bitcoin is relatively bori...,the price action of bitcoin is relatively bori...,price action bitcoin relatively boring higher ...,price action bitcoin relatively boring higher ...,price action bitcoin relatively boring higher ...,13
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,"comment: on the 1h chart, bitcoin is forming a...",comment on the 1h chart bitcoin is forming a h...,comment 1h chart bitcoin forming head shoulder...,1h chart bitcoin forming head shoulders patter...,1h chart bitcoin forming head shoulder pattern...,1


# Sentiment Analysis

In [19]:
from textblob import TextBlob as tb
# Polarity means the sentiment of the text (in range of -1 to 1)
df_final["Polarity"] = df_final["Lemmatized"].apply(lambda x: tb(x).sentiment[0])
df_final["Polarity"]

0     -0.005841
1      0.160000
2      0.137759
3      0.112534
4      0.485248
         ...   
769    0.000000
771    0.337500
772    0.140561
773    0.000000
774    0.126389
Name: Polarity, Length: 647, dtype: float64

In [20]:
# subjectivity means the degree to which the text is subjective (in range of 0 to 1)
df_final["Subjectivity"] = df_final["Lemmatized"].apply(lambda x: tb(x).sentiment[1])
df_final["Subjectivity"]

0      0.400930
1      0.540000
2      0.487588
3      0.460599
4      0.583437
         ...   
769    0.000000
771    0.475000
772    0.458418
773    0.000000
774    0.406250
Name: Subjectivity, Length: 647, dtype: float64

In [21]:
df_final

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,lower_case_text,Cleaned Text,Removed stop words,Final Text,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,i would like to call your attention to bitcoin...,i would like to call your attention to bitcoin...,would like call attention bitcoin's monthly ch...,would like call attention bitcoin's monthly ch...,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,comment: lower high on the monthly timeframe.....,comment lower high on the monthly timeframe ma...,comment lower high monthly timeframe maximum p...,lower high monthly timeframe maximum pain inco...,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signals are available everywhere... and new da...,signals are available everywhere and new data ...,signals available everywhere new data coming t...,signals available everywhere new data coming t...,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,comment: 🅱️ time to trade right now we are wit...,comment time to trade right now we are witnes...,comment time trade right witnessing bounce tim...,time trade right witnessing bounce time import...,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,comment: 🍀 affirmations to attract abundance w...,comment affirmations to attract abundance whe...,comment affirmations attract abundance wheneve...,affirmations attract abundance whenever feelin...,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,"comment: bitcoin is pumping, thanks for readin...",comment bitcoin is pumping thanks for reading ...,comment bitcoin pumping thanks reading idea,bitcoin pumping reading idea,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,comment: price perfectly respected the paralle...,comment price perfectly respected the parallel...,comment price perfectly respected parallel cha...,price perfectly respected parallel channel kee...,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,the price action of bitcoin is relatively bori...,the price action of bitcoin is relatively bori...,price action bitcoin relatively boring higher ...,price action bitcoin relatively boring higher ...,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,"comment: on the 1h chart, bitcoin is forming a...",comment on the 1h chart bitcoin is forming a h...,comment 1h chart bitcoin forming head shoulder...,1h chart bitcoin forming head shoulders patter...,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [22]:
df_final.describe()

Unnamed: 0,Post length,Timestamp,Word count,Stop words count,Stop word %,Lemmatized_Count,Polarity,Subjectivity
count,647.0,647.0,647.0,647.0,647.0,647.0,647.0,647.0
mean,76.891808,1714600000.0,70.236476,30.551777,40.799335,3.290572,0.092757,0.39496
std,93.059773,4489649.0,103.069176,47.331942,10.826951,6.262248,0.200324,0.249621
min,1.0,1653506000.0,5.0,0.0,0.0,0.0,-0.8,0.0
25%,8.0,1713643000.0,15.5,6.0,36.36,0.0,0.0,0.25
50%,21.0,1715677000.0,33.0,15.0,42.22,1.0,0.05,0.415814
75%,221.0,1716697000.0,73.5,33.0,47.74,3.0,0.1875,0.536195
max,221.0,1718218000.0,863.0,413.0,66.67,52.0,1.0,1.0


If we look at the mean for polarity we can see that its above 0 so on average the post are more positive than negative, however its a very small number meaning that the ratio of positive/negative is around the same. For subjectivity we can see that the mean is below 0.5 meaning that the post on average are more objective than subjective.

In [23]:
# we want to clean the overall dataset since we have a bunch of columns we dont use
df_final.drop(columns=["lower_case_text", "Cleaned Text", "Removed stop words", "Final Text"], axis=1, inplace=True)

In [24]:
pd.set_option('display.max_rows', 100)
df_final

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [25]:
# removing extra rows because they aren't nessacerry
title_to_remove = 'A Practical Guide For Candlestick Patterns!'
df_final.drop(df_final[df_final["Title"] == title_to_remove].index, inplace=True)

In [26]:
df_final

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [27]:
df_final.sort_values(by="Polarity")

Unnamed: 0,Trader,Title,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
545,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 151,1715198991,Comment:\nI like to follow stupid traders on x...,17,7,41.18,like follow stupid trader know maybe quirk,1,-0.80,1.000000
399,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 5,1711551207,Comment:\nUnderstanding EUR/USD is certainly n...,17,8,47.06,understanding eurusd certainly bad clicking pi...,0,-0.70,0.666667
535,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 141,1714920854,"Comment:\nThis may be wrong, but I want eviden...",15,6,40.00,may wrong want evidence destroys analysis 5d c...,0,-0.50,0.900000
537,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 143,1715082827,Comment:\nBtc pumped and at the same time 1w m...,55,23,41.82,btc pumped time 1w macd crossed bear today mak...,2,-0.50,0.600000
402,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 8,1711841847,"Comment:\nThe simpler you analyze, the more wr...",10,5,50.00,simpler analyze wrong analyze,0,-0.50,0.900000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,CRYPTOMOJO_TA,#ETH forming this massive falling wedge!,20,Update post 1,1715405394,Comment:\n\n#Bitcoin breaks down the 100MA sup...,52,20,38.46,bitcoin break 100ma support close candle may d...,4,0.75,0.400000
337,MoralDisciple,MATIC polygon analysis,29,Update post 20,1716046850,Comment:\nExtremely good! the feel of the mega...,29,11,37.93,extremely good feel mega max level good may bt...,1,0.80,0.733333
767,Xanrox,Bitcoin - new ATH soon. Best time to buy now,8,Update post 3,1715168183,"Comment:\nBitcoin is re-testing this channel, ...",12,5,41.67,bitcoin re-testing channel great buying opport...,0,0.80,0.750000
531,MoralDisciple,Bitcoin makes good profit. Now is just the beg...,221,Update post 137,1714768390,Comment:\nI am waiting for a comment on the RS...,22,11,50.00,waiting rsi yellowgreen area april 27 perfect ...,0,1.00,1.000000


In [28]:
#!pip install selenium

In [29]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

we want to create a graph that shows the relation between 2 post, to do that we need to create a new column with the topic of each post. The topic will be the name of the coin the post is about

In [31]:
def determine_topic(row, previous_title, previous_topic):
    title = row['Title']
    trader = row["Trader"]
    crypto_list = ["BTC", "ETH", "BNB", "SOL", "XRP", "DOGE", "TON", "ADA", "AVAX", "SHIB", "TRX", "DOT", "BCH", "LINK", "MATIC", "NEAR", "LTC", "ICP", "LEO", "DAI"]

    # Check if the current title matches the previous title
    if title == previous_title:
        return previous_topic

    driver = webdriver.Firefox()
    link = "https://www.tradingview.com/u/" + trader
    driver.get(link)
    time.sleep(5)
    found_title = False

    while not found_title:
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, 'div.tv-card-container > div.tv-card-container__columns > div.js-card-list.tv-card-container__ideas > '
                                  'div.tv-feed__item.tv-feed-layout__card-item.js-feed__item--inited')))
        # Locate all ideas on the page at this moment
        ideas = driver.find_elements(By.CSS_SELECTOR,
                                     'div.tv-card-container > div.tv-card-container__columns > div.js-card-list.tv-card-container__ideas > '
                                     'div.tv-feed__item.tv-feed-layout__card-item.js-feed__item--inited')
        
        for idea in ideas:
            idea_title_element = idea.find_element(By.CLASS_NAME, "tv-widget-idea__title-row")
            idea_title = idea_title_element.text.strip()
            
            if idea_title == title:
                idea_title_element.click()
                time.sleep(5)
                
                try:
                    symbol_link = WebDriverWait(driver, 10).until(
                        EC.visibility_of_element_located((By.CLASS_NAME, "tv-chart-view__symbol-link"))
                    )
                    href_attribute = symbol_link.get_attribute('href')
                    crypto_symbol = href_attribute.split("/")[-2]
                    first_half = (int((len(crypto_symbol) / 2 + 0.5)) + 1)
                    href = crypto_symbol[:first_half]
                    
                    # Continue only if the href contains one of the specified cryptocurrency symbols
                    if any(symbol in href for symbol in crypto_list):
                        driver.quit()
                        return href
                    
                except Exception as e:
                    print(f"Error retrieving href for idea {idea_title}: {str(e)}")
                    driver.back()
                    time.sleep(5)
                    continue
                
                found_title = True
                break

        if not found_title:
            # click the load more button at the bottom of the ideas
            try:
                load_more_button = WebDriverWait(driver, 5).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, 'span.tv-load-more__btn.js-load-more.tv-button--loader'))
                )
                load_more_button.click()
                # Wait for new ideas to load
                time.sleep(5)
            except Exception as e:
                print("No more ideas to load or failed to load more ideas:", str(e))
                break

    driver.quit()
    return None

# Assuming df_final is already defined in the previous cell
# Variables to keep track of the previous title and topic
previous_title = None
previous_topic = None

# List to store the topics
topics = []

# Apply the function to each row in the DataFrame
for index, row in df_final.iterrows():
    topic = determine_topic(row, previous_title, previous_topic)
    topics.append(topic)
    previous_title = row['Title']
    previous_topic = topic

# Add the topics to the DataFrame
df_final['Topic'] = topics

# Reorder the columns to place 'Topic' next to 'Title'
columns = list(df_final.columns)
columns.insert(columns.index('Title') + 1, columns.pop(columns.index('Topic')))
df_final = df_final[columns]

No more ideas to load or failed to load more ideas: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

No more ideas to load or failed to load more ideas: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

No more ideas to load or failed to load more ideas: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdri

In [32]:
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTCU,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTCU,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTCUS,2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTCUS,2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [33]:
crypto_list = ["BTC", "ETH", "BNB", "SOL", "XRP", "DOGE", "TON", "ADA", "AVAX", "SHIB", "TRX", "DOT", "BCH", "LINK", "MATIC", "NEAR", "LTC", "ICP", "LEO", "DAI"]


def correct_topic(topic):
    if topic is None:
        return None
    for symbol in crypto_list:
        if symbol in topic:
            return symbol
    return topic

df_final['Topic'] = df_final['Topic'].apply(correct_topic)

columns = list(df_final.columns)
columns.insert(columns.index('Title') + 1, columns.pop(columns.index('Topic')))
df_final = df_final[columns]

In [34]:
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [35]:
none_topics = df_final[df_final['Topic'].isna()][['Trader', 'Title', 'Topic']]
none_topics

Unnamed: 0,Trader,Title,Topic
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,
5,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,
6,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,
7,AlanSantana,"🅱️ Bitcoin Headed To $100,000!!! FOMO! Warning...",
8,AlanSantana,"🅱️ Bitcoin Headed To $100,000!!! FOMO! Warning...",
9,AlanSantana,"🅱️ Bitcoin Headed To $100,000!!! FOMO! Warning...",


Manually add the topics were the script got into errors

In [40]:
for index, row in none_topics.iterrows():
    title = row["Title"]
    if "Ethereum Will Hit Bottom Before Bitcoin" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Expert Confirms: Bitcoin Set To Crash Below" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Bitcoin Headed To $100,000!!!" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Final Warning: A 50% Capitulation Drop Will" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Bitcoin 4X SHORT" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Bitcoin Weekly Now Bearish" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Bitcoin 2-4 Months" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Dogecoin 4X SHORT" in title:
        new_topic = "DOGE"
        df_final.at[index, "Topic"] = new_topic
    elif "Solana 4X SHORT" in title:
        new_topic = "SOL"
        df_final.at[index, "Topic"] = new_topic
    elif "Moon is near" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Bchusdt trading opportunity" in title:
        new_topic = "BCH"
        df_final.at[index, "Topic"] = new_topic
    elif "ALT REQUEST FOR MONTH OF MAY" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Ethusdt trading opportunity" in title:
        new_topic = "ETH"
        df_final.at[index, "Topic"] = new_topic
    elif "Bitcoin - Have we just witnessed a" in title:
        new_topic = "BTC"
        df_final.at[index, "Topic"] = new_topic
    elif "Polkadot (DOT) to $50" in title:
        new_topic = "DOT"
        df_final.at[index, "Topic"] = new_topic
    elif "Dogecoin - Going up" in title:
        new_topic = "DOGE"
        df_final.at[index, "Topic"] = new_topic
    elif "A 80% correction to 10 cents in 2024 for XRP?" in title:
        new_topic = "XRP"
        df_final.at[index, "Topic"] = new_topic
    elif "MATIC - The one token to avoid in 2024" in title:
        new_topic = "MATIC"
        df_final.at[index, "Topic"] = new_topic
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Initial post,1715250618,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Update post 1,1718150706,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Initial post,1715767526,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 1,1715869218,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 2,1717238906,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 5,1715923856,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 7,1716020763,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Initial post,1715676697,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Update post 1,1715755990,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [None]:
#!pip install requests tradingview_ta

In [41]:
import requests
import datetime

# Mapping from your symbols to Binance symbols
symbol_to_binance = {
    "BTC": "BTCUSDT",
    "ETH": "ETHUSDT",
    "BNB": "BNBUSDT",
    "SOL": "SOLUSDT",
    "XRP": "XRPUSDT",
    "DOGE": "DOGEUSDT",
    "TON": "TONUSDT",
    "ADA": "ADAUSDT",
    "AVAX": "AVAXUSDT",
    "SHIB": "SHIBUSDT",
    "TRX": "TRXUSDT",
    "DOT": "DOTUSDT",
    "BCH": "BCHUSDT",
    "LINK": "LINKUSDT",
    "MATIC": "MATICUSDT",
    "NEAR": "NEARUSDT",
    "LTC": "LTCUSDT",
    "ICP": "ICPUSDT",
    "LEO": "LEOUSDT",
    "DAI": "DAIUSDT"
}

# Function to get the price from Binance API
def get_price(symbol, timestamp):
    binance_symbol = symbol_to_binance.get(symbol)
    if not binance_symbol:
        print(f"Symbol {symbol} not found in Binance symbols.")
        return None
    
    endpoint = f"https://api.binance.com/api/v3/klines"
    params = {
        'symbol': binance_symbol,
        'interval': '1m',  # 1-minute interval
        'startTime': timestamp * 1000,
        'endTime': (timestamp + 60) * 1000  # 1 minute later
    }
    
    try:
        response = requests.get(endpoint, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        
        if data:
            # Get the close price
            return float(data[0][4])  # The closing price is the 5th element in the list
        else:
            print(f"No data available for symbol: {symbol} at timestamp: {timestamp}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
    except ValueError as e:
        print(f"JSON decode failed: {e}")
        return None

# Example usage
# Assuming df_final is your DataFrame and it already has 'Topic' and 'Timestamp' columns
df_final['Price'] = None

# Loop through the DataFrame and fetch the price for each row
for index, row in df_final.iterrows():
    topic = row['Topic']
    timestamp = row['Timestamp']
    if pd.notna(topic):
        price = get_price(topic, timestamp)
        df_final.at[index, 'Price'] = price

# Reorder the columns to place 'Price' next to 'Timestamp'
columns = list(df_final.columns)
columns.insert(columns.index('Timestamp') + 1, columns.pop(columns.index('Price')))
df_final = df_final[columns]

In [42]:
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Price,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count,Polarity,Subjectivity
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Initial post,1715250618,61030.01,I would like to call your attention to Bitcoin...,155,59,38.06,would like call attention bitcoin's monthly ch...,3,-0.005841,0.400930
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Update post 1,1718150706,67367.99,Comment:\nLower high on the monthly timeframe....,13,4,30.77,lower high monthly timeframe maximum pain inco...,0,0.160000,0.540000
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Initial post,1715767526,62465.74,Signals are available everywhere... And new da...,355,163,45.92,signal available everywhere new data coming ti...,15,0.137759,0.487588
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 1,1715869218,65918.09,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,time trade right witnessing bounce time import...,18,0.112534,0.460599
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 2,1717238906,67654.74,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,affirmation attract abundance whenever feeling...,14,0.485248,0.583437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 5,1715923856,65483.4,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,bitcoin pumping reading idea,0,0.000000,0.000000
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 7,1716020763,67023.76,Comment:\nPrice perfectly respected the parall...,15,3,20.00,price perfectly respected parallel channel kee...,1,0.337500,0.475000
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Initial post,1715676697,61853.19,The price action of Bitcoin is relatively bori...,281,134,47.69,price action bitcoin relatively boring higher ...,13,0.140561,0.458418
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Update post 1,1715755990,61922.0,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,1h chart bitcoin forming head shoulder pattern...,1,0.000000,0.000000


In [43]:
df_final = df_final.drop(columns=['Polarity', 'Subjectivity'])

In [48]:
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Price,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Initial post,1715250618,61030.01,I would like to call your attention to Bitcoin...,155,59,38.06,The price at the moment is: 61030.01. would li...,3
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Update post 1,1718150706,67367.99,Comment:\nLower high on the monthly timeframe....,13,4,30.77,The price at the moment is: 67367.99. lower hi...,0
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Initial post,1715767526,62465.74,Signals are available everywhere... And new da...,355,163,45.92,The price at the moment is: 62465.74. signal a...,15
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 1,1715869218,65918.09,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,The price at the moment is: 65918.09. time tra...,18
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 2,1717238906,67654.74,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,The price at the moment is: 67654.74. affirmat...,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 5,1715923856,65483.4,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,The price at the moment is: 65483.4. bitcoin p...,0
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 7,1716020763,67023.76,Comment:\nPrice perfectly respected the parall...,15,3,20.00,The price at the moment is: 67023.76. price pe...,1
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Initial post,1715676697,61853.19,The price action of Bitcoin is relatively bori...,281,134,47.69,The price at the moment is: 61853.19. price ac...,13
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Update post 1,1715755990,61922.0,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,The price at the moment is: 61922.0. 1h chart ...,1


In [45]:
file_path = os.path.join(FOLDER_PATH, "without_price.csv")
df_final.to_csv(file_path, index=False)

In [46]:
df_final['Lemmatized'] = df_final.apply(
    lambda row: f"The price at the moment is: {row['Price']}. {row['Lemmatized']}", axis=1
)

In [47]:
df_final

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Price,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count
0,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Initial post,1715250618,61030.01,I would like to call your attention to Bitcoin...,155,59,38.06,The price at the moment is: 61030.01. would li...,3
1,AlanSantana,🅱️ Ethereum Will Hit Bottom Before Bitcoin,BTC,2,Update post 1,1718150706,67367.99,Comment:\nLower high on the monthly timeframe....,13,4,30.77,The price at the moment is: 67367.99. lower hi...,0
2,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Initial post,1715767526,62465.74,Signals are available everywhere... And new da...,355,163,45.92,The price at the moment is: 62465.74. signal a...,15
3,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 1,1715869218,65918.09,Comment:\n🅱️ Time To Trade\n\nRight now we are...,530,274,51.70,The price at the moment is: 65918.09. time tra...,18
4,AlanSantana,🅱️ Expert Confirms: Bitcoin Set To Crash Below...,BTC,5,Update post 2,1717238906,67654.74,Comment:\n🍀 AFFIRMATIONS TO ATTRACT ABUNDANCE\...,259,122,47.10,The price at the moment is: 67654.74. affirmat...,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 5,1715923856,65483.4,"Comment:\nBitcoin is pumping, thanks for readi...",9,3,33.33,The price at the moment is: 65483.4. bitcoin p...,0
771,Xanrox,Bitcoin - new ATH soon. Best time to buy now,BTC,8,Update post 7,1716020763,67023.76,Comment:\nPrice perfectly respected the parall...,15,3,20.00,The price at the moment is: 67023.76. price pe...,1
772,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Initial post,1715676697,61853.19,The price action of Bitcoin is relatively bori...,281,134,47.69,The price at the moment is: 61853.19. price ac...,13
773,Xanrox,"Bitcoin - Ultimate bottom will be here, but no...",BTC,2,Update post 1,1715755990,61922.0,"Comment:\nOn the 1h chart, Bitcoin is forming ...",19,7,36.84,The price at the moment is: 61922.0. 1h chart ...,1


In [49]:
file_path = os.path.join(FOLDER_PATH, "dataset-final.csv")
df_final.to_csv(file_path, index=False)

In [50]:
df_final.sort_values(by="Timestamp")

Unnamed: 0,Trader,Title,Topic,Post length,Post type,Timestamp,Price,Text,Word count,Stop words count,Stop word %,Lemmatized,Lemmatized_Count
636,weslad,POTENTIAL BITCOIN LIFECYCLE,BTC,3,Initial post,1653505733,29894.26,"Hello all,\n\nSharing with you today is a pote...",365,145,39.73,The price at the moment is: 29894.26. sharing ...,12
635,weslad,Bitcoin Trading trading cycle,BTC,1,Initial post,1668055308,16274.06,This is very long term view and still very muc...,39,18,46.15,The price at the moment is: 16274.06. long ter...,2
639,weslad,Dogeusdt Trading idea,DOGE,3,Initial post,1690226162,0.07516,DOGEUSDT is showing signs of forming an Adam a...,102,41,40.20,The price at the moment is: 0.07516. dogeusdt ...,3
689,without_worries,Dogecoin - Going up,DOGE,7,Initial post,1690302626,0.07878,but only for a little while. Do not believe th...,102,36,35.29,The price at the moment is: 0.07878. little be...,5
682,without_worries,Polkadot (DOT) to $50,DOT,5,Initial post,1692485056,4.51,On the above 4-day chart price action has corr...,85,27,31.76,The price at the moment is: 4.51. 4-day chart ...,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,MoralDisciple,UP UP UP Cardano analysis,ADA,43,Update post 39,1718183190,0.4255,Comment:\nBtc will fall and probably more and ...,13,5,38.46,The price at the moment is: 0.4255. btc fall p...,0
245,MoralDisciple,UP UP UP Cardano analysis,ADA,43,Update post 40,1718183408,0.4255,"Comment:\nAfter the ideal btc rsi, I buy ada a...",16,6,37.50,The price at the moment is: 0.4255. ideal btc ...,0
246,MoralDisciple,UP UP UP Cardano analysis,ADA,43,Update post 41,1718187651,0.4278,Comment:\nThat's why I'm waiting for spu confi...,8,2,25.00,The price at the moment is: 0.4278. that's wai...,0
247,MoralDisciple,UP UP UP Cardano analysis,ADA,43,Update post 42,1718194116,0.426,"Comment:\nWith this theory, I expect that the ...",39,21,53.85,The price at the moment is: 0.426. theory expe...,0
