# Preparing and cleaning the data 

This document will show you how I prepare and clean the data. 

Initially, I wanted to download information from the Reddit API over a long time span. However, I got blogged and got 429 errors very quickly. Therefore I had to adapt my approach and choose five dates and see what were the most popular stocks on these five given dates. 

This might be wrong, but it was the only possibility I had to get around the 429 error.

In [1]:
from dateutil.rrule import *
from datetime import date
import json
from pandas import json_normalize
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime
import time
import seaborn as sns
import matplotlib.pyplot as plt

## Phase 1

The first challenge was to make a list of 5 dates. It is not complicated to create a list of dates. However, there is always at the end the time (00:00:00) and I wanted to get ride of this as the Reddit API only needed the year, month and day to work. Any other format was leading to an error. 

In [2]:
#This is a code that I copied and is doing the job 

date = [dt.date().isoformat()
    for dt in rrule(MONTHLY, dtstart=date(2022, 6, 1), until=date.today())]

date

['2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01']

In [3]:
#Here, I know that the code is heavy, and it is the perfect case to use a for loop
#However, when I tried to implement a for loop, I automatically got a 429 error 

res_1 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[0]}")
res_1_clean = res_1.json() # calling the information from the website
data_1 = pd.DataFrame.from_dict(res_1_clean) # putting the information in a df
data_1 = data_1.assign(Date='2022-06-01') # adding a column with the data

res_1 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[0]}")
res_1_clean = res_1.json()
data_1 = pd.DataFrame.from_dict(res_1_clean)
data_1 = data_1.assign(Date='2022-06-01')

res_2 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[1]}")
res_2_clean = res_2.json()
data_2 = pd.DataFrame.from_dict(res_2_clean)
data_2 = data_2.assign(Date='2022-07-01')

res_3 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[2]}")
res_3_clean = res_3.json()
data_3 = pd.DataFrame.from_dict(res_3_clean)
data_3 = data_3.assign(Date='2022-08-01')

res_4 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[3]}")
res_4_clean = res_4.json()
data_4 = pd.DataFrame.from_dict(res_4_clean)
data_4 = data_4.assign(Date='2022-09-01')

res_5 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date[4]}")
res_5_clean = res_5.json()
data_5 = pd.DataFrame.from_dict(res_5_clean)
data_5 = data_5.assign(Date='2022-10-01')


#final data set with the 5 chosen dates 
df_all = pd.concat([data_1, data_2, data_3, data_4, data_5])

df_all.to_csv('1df_all.csv', index = True, encoding='utf-8')


In [4]:
#See first 5 results
df_all.head()

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
0,382,Bullish,0.13,GME,2022-06-01
1,40,Bullish,0.151,TA,2022-06-01
2,30,Bullish,0.257,TSLA,2022-06-01
3,29,Bullish,0.293,AMZN,2022-06-01
4,25,Bullish,0.903,CRM,2022-06-01


In [5]:
#See last 5 results
df_all.tail()

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
45,2,Bearish,-0.425,AIG,2022-10-01
46,2,Bullish,0.952,IP,2022-10-01
47,2,Bullish,0.577,BOE,2022-10-01
48,2,Bullish,0.361,LEVI,2022-10-01
49,2,Bullish,0.114,MSFT,2022-10-01


In [6]:
#Sorting the value and see the TOP 10
df_all_sorted = df_all.sort_values(by=['no_of_comments'],ascending=False)
df_all_sorted_top_10 = df_all_sorted[0:10]

df_all_sorted_top_10.to_csv('2df_all_sorted_top_10.csv', index = True, encoding='utf-8')
df_all_sorted_top_10


Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
0,542,Bearish,-0.392,BBBY,2022-09-01
0,382,Bullish,0.13,GME,2022-06-01
1,202,Bullish,0.13,GME,2022-09-01
2,101,Bearish,-0.109,NVDA,2022-09-01
0,97,Bullish,0.257,TSLA,2022-08-01
1,76,Bullish,0.19,AMD,2022-08-01
0,75,Bullish,0.13,GME,2022-10-01
2,74,Bullish,0.13,GME,2022-08-01
0,63,Bullish,0.257,TSLA,2022-07-01
3,61,Bullish,0.244,RC,2022-09-01


In [7]:
# See which ticker comes most often
df_all_sorted_ticker = df_all_sorted['ticker'].value_counts()
df_all_sorted_ticker = pd.DataFrame(df_all_sorted_ticker)
df_all_sorted_ticker

Unnamed: 0,ticker
TSLA,5
AMD,5
GME,5
AMC,5
IQ,5
...,...
NKE,1
UBS,1
REAL,1
VERY,1


In [8]:
#See the distribution of the number of comments
df_all_sorted.groupby("no_of_comments")["ticker"].count()

no_of_comments
2      34
3      60
4      38
5      29
6      12
7      10
8       7
9       1
10      8
11      5
12      2
13      4
14      5
15      2
16      1
17      2
19      1
21      3
22      2
24      1
25      3
27      1
28      1
29      1
30      2
33      1
36      1
40      1
42      1
52      1
61      1
63      1
74      1
75      1
76      1
97      1
101     1
202     1
382     1
542     1
Name: ticker, dtype: int64

In [9]:
# See if between the date the total number of comments was similar 

df_all_sorted_noc = df_all_sorted.groupby("Date")["no_of_comments"].sum()
df_all_sorted_noc

Date
2022-06-01     759
2022-07-01     376
2022-08-01     536
2022-09-01    1276
2022-10-01     411
Name: no_of_comments, dtype: int64

In [10]:
#Put the information above in a data frame
df_all_sorted_noc=pd.DataFrame(df_all_sorted_noc)
df_all_sorted_noc =df_all_sorted_noc.reset_index(drop=False)
column_headers = df_all_sorted_noc.columns.values.tolist()
column_headers
df_all_sorted_noc.to_csv('3df_all_sorted_noc.csv', index = True, encoding='utf-8')


## Phase 2 

Now that I was able to retrieve the information from the API, I will focus on the 3 most pouplar stocks out of the 5 days I looked at

- BBBY or Bed Bath and Beyond 
- GME or GameStop 
- NVDA or NVIDIA 

In [11]:
# Calling and defining these 3 tickers
bbby = yf.Ticker("BBBY")
gme = yf.Ticker("GME")
nvda = yf.Ticker("NVDA")

In [58]:
hist_bbby = bbby.history(start="2022-09-01", end ="2022-10-01", interval="1d") #Have the price over a defined period of time
hist_bbby = pd.DataFrame(hist_bbby) #Create a dataframe with this information

hist_bbby_clean = hist_bbby.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False) #drop the uncessary columns
hist_bbby_clean = hist_bbby_clean.reset_index(drop=False) 
hist_bbby_clean_2 = hist_bbby_clean.iloc[[0,1,2,20]] #keep the row that only interest me
hist_bbby_clean_2 = pd.DataFrame(hist_bbby_clean_2) #Create a dateframe with these last rows
hist_bbby_clean_2.insert(3,"rebased at 100", [100,99,81,70])
hist_bbby_clean_2.to_csv('4hist_bbby_clean_2.csv', index = True, encoding='utf-8')

display(hist_bbby_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-09-01 00:00:00-04:00,8.71,31421700,100
1,2022-09-02 00:00:00-04:00,8.63,31152300,99
2,2022-09-06 00:00:00-04:00,7.04,32500500,81
20,2022-09-30 00:00:00-04:00,6.09,7665200,70


In [59]:
gme = yf.Ticker("GME")
hist_gme = gme.history(start="2022-06-01", end ="2022-07-01", interval="1d")
hist_gme = pd.DataFrame(hist_gme)

hist_gme_clean = hist_gme.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_gme_clean = hist_gme_clean.reset_index(drop=False)
hist_gme_clean_2 = hist_gme_clean.iloc[[0,1,2,20]]
hist_gme_clean_2 = pd.DataFrame(hist_gme_clean_2)
hist_gme_clean_2.insert(3,"rebased at 100", [100,110,110,100])
hist_gme_clean_2.to_csv('5hist_gme_clean_2.csv', index = True, encoding='utf-8')

display(hist_gme_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-06-01 00:00:00-04:00,30.35,14991600,100
1,2022-06-02 00:00:00-04:00,33.5,27918800,110
2,2022-06-03 00:00:00-04:00,33.424999,18234400,110
20,2022-06-30 00:00:00-04:00,30.575001,6718800,100


In [60]:
nvda = yf.Ticker("NVDA")
hist_nvda = nvda.history(start="2022-09-01", end ="2022-10-01", interval="1d")
hist_nvda = pd.DataFrame(hist_nvda)

hist_nvda_clean = hist_nvda.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_nvda_clean = hist_nvda_clean.reset_index(drop=False)
hist_nvda_clean_2 = hist_nvda_clean.iloc[[0,1,2,20]]
hist_nvda_clean_2 = pd.DataFrame(hist_nvda_clean_2)
hist_nvda_clean_2.insert(3,"rebased at 100", [100,98,97,87])
hist_nvda_clean_2.to_csv('6hist_nvda_clean_2.csv', index = True, encoding='utf-8')
display(hist_nvda_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-09-01 00:00:00-04:00,139.328598,117886500,100
1,2022-09-02 00:00:00-04:00,136.429459,74259000,98
2,2022-09-06 00:00:00-04:00,134.610001,53575100,97
20,2022-09-30 00:00:00-04:00,121.389999,56486900,87


## Phase 3 

I will look at two indexes to see if, at the same period, the market was already on a downward or upward path. 

The S&P 500 Index, or Standard & Poor's 500 Index, is a market-capitalization-weighted index of 500 leading publicly traded companies in the U.S [Investopedia](https://www.investopedia.com/terms/s/sp500.asp)

The Nasdaq Composite Index is a market capitalization-weighted index of more than 3,700 stocks listed on the Nasdaq stock exchange. As a broad index heavily weighted toward the important technology sector, the Nasdaq Composite Index has become a staple of financial markets reports. [Investopedia](https://www.investopedia.com/terms/n/nasdaqcompositeindex.asp)

In [61]:
snp = yf.Ticker("^GSPC")
hist_snp = snp.history(start="2022-06-01", end ="2022-07-01", interval="1d")
hist_snp = pd.DataFrame(hist_snp)

hist_snp_clean = hist_snp.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_snp_clean = hist_snp_clean.reset_index(drop=False)
hist_snp_clean_2 = hist_snp_clean.iloc[[0,1,2,20]]
hist_snp_clean_2 = pd.DataFrame(hist_snp_clean_2)
hist_snp_clean_2.insert(3,"rebased at 100", [100,102,100,92])
hist_snp_clean_2.to_csv('7hist_snp_clean_2.csv', index = True, encoding='utf-8')

display(hist_snp_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-06-01 00:00:00-04:00,4101.22998,4145710000,100
1,2022-06-02 00:00:00-04:00,4176.819824,3604930000,102
2,2022-06-03 00:00:00-04:00,4108.540039,3107080000,100
20,2022-06-30 00:00:00-04:00,3785.379883,4032260000,92


In [63]:
nsd = yf.Ticker("^IXIC")
hist_nsd = nsd.history(start="2022-09-01", end ="2022-10-01", interval="1d")
hist_nsd = pd.DataFrame(hist_nsd)

hist_nsd_clean = hist_nsd.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_nsd_clean = hist_nsd_clean.reset_index(drop=False)
hist_nsd_clean_2 = hist_nsd_clean.iloc[[0,1,2,20]]
hist_nsd_clean_2 = pd.DataFrame(hist_nsd_clean_2)
hist_nsd_clean_2.insert(3,"rebased at 100", [100,99,98,90])
hist_nsd_clean_2.to_csv('8hist_nsd_clean_2.csv', index = True, encoding='utf-8')

display(hist_nsd_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-09-01 00:00:00-04:00,11785.129883,4792890000,100
1,2022-09-02 00:00:00-04:00,11630.860352,4257410000,99
2,2022-09-06 00:00:00-04:00,11544.910156,4622940000,98
20,2022-09-30 00:00:00-04:00,10575.620117,4649710000,90


In [69]:
snp = yf.Ticker("^GSPC")
hist_snp = snp.history(start="2022-06-01", end ="2022-07-01", interval="1d")
hist_snp = pd.DataFrame(hist_snp)

hist_snp_clean = hist_snp.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_snp_clean = hist_snp_clean.reset_index(drop=False)
hist_snp_clean_2 = hist_snp_clean.iloc[[0,1,2,20]]
hist_snp_clean_2 = pd.DataFrame(hist_snp_clean_2)
hist_snp_clean_2.insert(3,"rebased at 100", [100,99,99,90])
hist_snp_clean_2.to_csv('9hist_snp_clean_2.csv', index = True, encoding='utf-8')

display(hist_snp_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-06-01 00:00:00-04:00,4101.22998,4145710000,100
1,2022-06-02 00:00:00-04:00,4176.819824,3604930000,99
2,2022-06-03 00:00:00-04:00,4108.540039,3107080000,99
20,2022-06-30 00:00:00-04:00,3785.379883,4032260000,90


In [70]:
nsd = yf.Ticker("^IXIC")
hist_nsd = nsd.history(start="2022-09-01", end ="2022-10-01", interval="1d")
hist_nsd = pd.DataFrame(hist_nsd)

hist_nsd_clean = hist_nsd.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace = False)
hist_nsd_clean = hist_nsd_clean.reset_index(drop=False)
hist_nsd_clean_2 = hist_nsd_clean.iloc[[0,1,2,20]]
hist_nsd_clean_2 = pd.DataFrame(hist_nsd_clean_2)
hist_nsd_clean_2.insert(3,"rebased at 100", [100,99,98,90])
hist_nsd_clean_2.to_csv('10hist_nsd_clean_2.csv', index = True, encoding='utf-8')

display(hist_nsd_clean_2)

Unnamed: 0,Date,Close,Volume,rebased at 100
0,2022-09-01 00:00:00-04:00,11785.129883,4792890000,100
1,2022-09-02 00:00:00-04:00,11630.860352,4257410000,99
2,2022-09-06 00:00:00-04:00,11544.910156,4622940000,98
20,2022-09-30 00:00:00-04:00,10575.620117,4649710000,90


## Phase 4 


Finally, I just wanted to have a look at based on the three stocks I chose if they were on the picks of many comments on the date I chose them.

In [30]:
#For GME

date_2 = datetime.datetime.strptime("2022-5-28", "%Y-%m-%d")
K = 9
date_2 = pd.date_range(date_2, periods=K)
print(date_2.strftime("%Y-%m-%d"))

Index(['2022-05-28', '2022-05-29', '2022-05-30', '2022-05-31', '2022-06-01',
       '2022-06-02', '2022-06-03', '2022-06-04', '2022-06-05'],
      dtype='object')


In [33]:
#For BBBY and NVDA 
import datetime
 
date_5 = datetime.datetime.strptime("2022-8-28", "%Y-%m-%d")
K = 9
date_5 = pd.date_range(date_5, periods=K)
print(date_5.strftime("%Y-%m-%d"))

Index(['2022-08-28', '2022-08-29', '2022-08-30', '2022-08-31', '2022-09-01',
       '2022-09-02', '2022-09-03', '2022-09-04', '2022-09-05'],
      dtype='object')


In [34]:
res_999 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[0]}")
res_999_clean = res_999.json()
data_999 = pd.DataFrame.from_dict(res_999_clean)
data_999 = data_999.assign(Date='2022-05-28')

res_998 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[1]}")
res_998_clean = res_998.json()
data_998 = pd.DataFrame.from_dict(res_998_clean)
data_998 = data_998.assign(Date='2022-05-29')

res_997 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[2]}")
res_997_clean = res_997.json()
data_997 = pd.DataFrame.from_dict(res_997_clean)
data_997 = data_997.assign(Date='2022-05-30')

res_996 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[3]}")
res_996_clean = res_996.json()
data_996 = pd.DataFrame.from_dict(res_996_clean)
data_996 = data_996.assign(Date='2022-05-31')

res_995 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[4]}")
res_995_clean = res_995.json()
data_995 = pd.DataFrame.from_dict(res_995_clean)
data_995 = data_995.assign(Date='2022-06-01')

res_994 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[5]}")
res_994_clean = res_994.json()
data_994 = pd.DataFrame.from_dict(res_994_clean)
data_994 = data_994.assign(Date='2022-06-02')

res_993 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[6]}")
res_993_clean = res_993.json()
data_993 = pd.DataFrame.from_dict(res_993_clean)
data_993 = data_993.assign(Date='2022-06-03')

res_992 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[7]}")
res_992_clean = res_992.json()
data_992 = pd.DataFrame.from_dict(res_992_clean)
data_992 = data_992.assign(Date='2022-06-04')

res_991 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_2[8]}")
res_991_clean = res_991.json()
data_991 = pd.DataFrame.from_dict(res_991_clean)
data_991 = data_991.assign(Date='2022-06-05')

df_data_sample_1 = pd.concat([data_999, data_998, data_997, data_996, data_995, data_994, data_993, data_992, data_991])
df_data_sample_1

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [32]:
res_899 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[0]}")
res_899_clean = res_899.json()
data_899 = pd.DataFrame.from_dict(res_899_clean)
data_899 = data_899.assign(Date='2022-08-28')

res_898 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[1]}")
res_898_clean = res_898.json()
data_898 = pd.DataFrame.from_dict(res_898_clean)
data_898 = data_898.assign(Date='2022-08-29')
#data_998 = data_999.append(data_998)


res_897 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[2]}")
res_897_clean = res_897.json()
data_897 = pd.DataFrame.from_dict(res_897_clean)
data_897 = data_897.assign(Date='2022-08-30')
#data_997 = data_998.append(data_997)


res_896 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[3]}")
res_896_clean = res_896.json()
data_896 = pd.DataFrame.from_dict(res_896_clean)
data_896 = data_896.assign(Date='2022-08-31')
#data_996 = data_997.append(data_996)


res_895 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[4]}")
res_895_clean = res_895.json()
data_895 = pd.DataFrame.from_dict(res_895_clean)
data_895 = data_895.assign(Date='2022-09-01')
#data_995 = data_996.append(data_995)


res_894 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[5]}")
res_894_clean = res_894.json()
data_894 = pd.DataFrame.from_dict(res_894_clean)
data_894 = data_894.assign(Date='2022-09-02')
#data_994 = data_995.append(data_994)


res_893 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[6]}")
res_893_clean = res_893.json()
data_893 = pd.DataFrame.from_dict(res_893_clean)
data_893 = data_893.assign(Date='2022-09-03')
#data_993 = data_994.append(data_993)


res_892 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[7]}")
res_892_clean = res_892.json()
data_892 = pd.DataFrame.from_dict(res_892_clean)
data_892 = data_892.assign(Date='2022-09-04')
#data_992 = data_993.append(data_992)


res_891 = requests.get(f"https://tradestie.com/api/v1/apps/reddit?date={date_5[8]}")
res_891_clean = res_991.json()
data_891 = pd.DataFrame.from_dict(res_891_clean)
data_891 = data_891.assign(Date='2022-09-05')
#data_991 = data_992.append(data_991)


df_data_sample_2 = pd.concat([data_899, data_898, data_897, data_896, data_895, data_894, data_893, data_892, data_891])
df_data_sample_2.head()

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
0,232,Bearish,-0.392,BBBY,2022-08-28
1,83,Bullish,0.13,GME,2022-08-28
2,45,Bullish,0.244,RC,2022-08-28
3,17,Bullish,0.159,AMC,2022-08-28
4,15,Bullish,0.415,SQQQ,2022-08-28


In [42]:
df_data_sample_2_BBBY = df_data_sample_2[df_data_sample_2['ticker'] == ('BBBY')]
df_data_sample_2_BBBY.to_csv('11df_data_sample_2_BBBY.csv', index = True, encoding='utf-8')
df_data_sample_2_BBBY

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
0,232,Bearish,-0.392,BBBY,2022-08-28
0,782,Bearish,-0.392,BBBY,2022-08-29
0,834,Bearish,-0.392,BBBY,2022-08-30
0,1006,Bearish,-0.392,BBBY,2022-08-31
0,542,Bearish,-0.392,BBBY,2022-09-01
0,351,Bearish,-0.392,BBBY,2022-09-02
0,219,Bearish,-0.392,BBBY,2022-09-03
0,378,Bearish,-0.392,BBBY,2022-09-04


In [43]:
df_data_sample_2_NVDA = df_data_sample_2[df_data_sample_2['ticker'] == ('NVDA')]
df_data_sample_2_NVDA.to_csv('12df_data_sample_2_NVDA.csv', index = True, encoding='utf-8')
df_data_sample_2_NVDA

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
22,3,Bearish,-0.109,NVDA,2022-08-28
27,5,Bearish,-0.109,NVDA,2022-08-29
42,3,Bearish,-0.109,NVDA,2022-08-30
3,65,Bearish,-0.109,NVDA,2022-08-31
2,101,Bearish,-0.109,NVDA,2022-09-01
4,32,Bearish,-0.109,NVDA,2022-09-02
12,8,Bearish,-0.109,NVDA,2022-09-03


In [44]:
df_data_sample_2_GME = df_data_sample_1[df_data_sample_1['ticker'] == ('GME')]
df_data_sample_2_GME.to_csv('13df_data_sample_2_GME.csv', index = True, encoding='utf-8')
df_data_sample_2_GME

Unnamed: 0,no_of_comments,sentiment,sentiment_score,ticker,Date
0,59,Bullish,0.13,GME,2022-05-28
0,60,Bullish,0.13,GME,2022-05-29
0,58,Bullish,0.13,GME,2022-05-30
0,59,Bullish,0.13,GME,2022-05-31
0,382,Bullish,0.13,GME,2022-06-01
0,219,Bullish,0.13,GME,2022-06-02
1,108,Bullish,0.13,GME,2022-06-03
0,83,Bullish,0.13,GME,2022-06-04
0,101,Bullish,0.13,GME,2022-06-05
