# Step 0. Import dependencies
# Step 1. Get content from r/wallstreetbets
# Step 2. Analyze word frequency
# Step 3. Get a list of stock tickers
# Step 4. Inner join word frequency and stock tickers
# Step 5. Output the 100 to 300+ most mentioned stocks in csv

In [1]:
# Step 0: Import dependent packages

In [2]:
import pandas as pd #for data transformation
import praw #to access reddit data
import re #for regex on scraped text
import requests # for posting message

In [3]:
# Step 1: Get content from Reddit /wsb

In [4]:
reddit = praw.Reddit(
    client_id="I0BDP3rqyT-BJx8rRQBxwg",
    client_secret = "Z625x_gkqEMIV-IrD4dvBwi70gcs0w",
    user_agent = "abubakar-api")

In [5]:
df = []

for post in reddit.subreddit('wallstreetbets+investing+stocks').top(limit=20000):
    
    content = {
        "title" : post.title,
        "text" : post.selftext
    }
    
    df.append(content)

df = pd.DataFrame(df)

In [6]:
# Step 2: Analyze word frequency

regex = re.compile('[^a-zA-Z ]')
word_dict = {}

#Cleaning our data -> Removing the spaces, symbols and other unrelated stuff.
for (index, row) in df.iterrows():
    # titles
    title = row['title']
    
    title = regex.sub('', title)
    title_words = title.split(' ')
    
    # content
    content = row['text']
    
    content = regex.sub('', content)
    content_words = content.split(' ')
    
    # combining our title and content ...like  a paragraph
    words = title_words + content_words
    
    for x in words: #Checking the number of times a ticket is repeated.
        
        if x in ['A', 'B', 'GO', 'ARE', 'ON', 'IT', 'ALL', 'NEXT', 'PUMP', 'AT', 'NOW', 'FOR', 'TD', 'CEO', 'AM', 'K', 'BIG', 'BY', 'LOVE', 'CAN', 'BE', 'SO', 'OUT', 'STAY', 'OR', 'NEW','RH','EDIT','ONE','ANY']:
            pass
        elif x in word_dict:
            word_dict[x] += 1
        else:
            word_dict[x] = 1

word_df = pd.DataFrame.from_dict(list(word_dict.items())).rename(columns = {0:"Term", 1:"Frequency"})


In [7]:
# Step 3: Get a list of stock tickers (Get data from csv)

In [8]:
ticker_df = pd.read_csv('tickers.csv').rename(columns = {"Symbol":"Term", "Name":"Company_Name"})


In [9]:
# Step 4: Compare tickers and words scraped (Compare csv data with our scraped data to ensure our data
# must be related to Ticket and CompanyName)

In [10]:
stonks_df = pd.merge(ticker_df, word_df, on="Term")
print(stonks_df)
#Copy the merged data into another variable
Copy_stocks_df=stonks_df

     Term                                       Company_Name  Frequency
0      AA                     Alcoa Corporation Common Stock          1
1     AAL          American Airlines Group Inc. Common Stock          7
2    AAPL                            Apple Inc. Common Stock         55
3      AB              AllianceBernstein Holding L.P.  Units         11
4     ABB                               ABB Ltd Common Stock          3
..    ...                                                ...        ...
694     Z            Zillow Group Inc. Class C Capital Stock          9
695   ZEN                          Zendesk Inc. Common Stock          1
696    ZG             Zillow Group Inc. Class A Common Stock          1
697    ZM  Zoom Video Communications Inc. Class A Common ...         10
698  TRUE                          TrueCar Inc. Common Stock          1

[699 rows x 3 columns]


In [11]:
# Making column in descending Order and displaying the all the scrapped cleaned data
stonks_df = stonks_df.sort_values(by="Frequency", ascending = False, ignore_index = True).head(len(Copy_stocks_df))
# stonks_df

#Saving al data in a csv file
stonks_df.to_csv("Stock_Cleaned_Data.csv")

In [14]:
# Displayig all data as output on the screen
for i in range(0,len(stonks_df),1):
    print(i,")- ",stonks_df['Company_Name'][i]," ( ",stonks_df['Term'][i]," )"," - " + str(stonks_df['Frequency'][i]) + " mentions\n")

0 )-  GameStop Corporation Common Stock  (  GME  )  - 688 mentions

1 )-  SP Plus Corporation Common Stock  (  SP  )  - 415 mentions

2 )-  Macy's Inc Common Stock  (  M  )  - 217 mentions

3 )-  Tesla Inc. Common Stock  (  TSLA  )  - 169 mentions

4 )-  BlackBerry Limited Common Stock  (  BB  )  - 159 mentions

5 )-  Eaton Vance Corporation Common Stock  (  EV  )  - 138 mentions

6 )-  DuPont de Nemours Inc. Common Stock  (  DD  )  - 137 mentions

7 )-  AMC Entertainment Holdings Inc. Class A Common Stock  (  AMC  )  - 107 mentions

8 )-  Advanced Micro Devices Inc. Common Stock  (  AMD  )  - 90 mentions

9 )-  AT&T Inc.  (  T  )  - 86 mentions

10 )-  Palantir Technologies Inc. Class A Common Stock  (  PLTR  )  - 72 mentions

11 )-  Energy Transfer LP Common Units  (  ET  )  - 71 mentions

12 )-  Microsoft Corporation Common Stock  (  MSFT  )  - 70 mentions

13 )-  NIO Inc. American depositary shares each  representing one Class A ordinary share  (  NIO  )  - 67 mentions

14 )-  C3.a

128 )-  Sealed Air Corporation Common Stock  (  SEE  )  - 7 mentions

129 )-  Dave & Buster's Entertainment Inc. Common Stock  (  PLAY  )  - 7 mentions

130 )-  ViacomCBS Inc. Class B Common Stock  (  VIAC  )  - 7 mentions

131 )-  Healthcare Realty Trust Incorporated Common Stock  (  HR  )  - 7 mentions

132 )-  Helmerich & Payne Inc. Common Stock  (  HP  )  - 7 mentions

133 )-  Prosperity Bancshares Inc. Common Stock  (  PB  )  - 7 mentions

134 )-  Deere & Company Common Stock  (  DE  )  - 6 mentions

135 )-  FSD Pharma Inc. Class B Subordinate Voting Shares  (  HUGE  )  - 6 mentions

136 )-  Beyond Meat Inc. Common Stock  (  BYND  )  - 6 mentions

137 )-  aTyr Pharma Inc. Common Stock  (  LIFE  )  - 6 mentions

138 )-  Electronic Arts Inc. Common Stock  (  EA  )  - 6 mentions

139 )-  Bank of America Corporation Common Stock  (  BAC  )  - 6 mentions

140 )-  Boeing Company (The) Common Stock  (  BA  )  - 6 mentions

141 )-  Home Depot Inc. (The) Common Stock  (  HD  )  - 6 mention

272 )-  DocuSign Inc. Common Stock  (  DOCU  )  - 3 mentions

273 )-  Intellia Therapeutics Inc. Common Stock  (  NTLA  )  - 3 mentions

274 )-  Deutsche Bank AG Common Stock  (  DB  )  - 3 mentions

275 )-  McDonald's Corporation Common Stock  (  MCD  )  - 3 mentions

276 )-  Levi Strauss & Co Class A Common Stock  (  LEVI  )  - 2 mentions

277 )-  Sunstone Hotel Investors Inc. Sunstone Hotel Investors Inc. Common Shares  (  SHO  )  - 2 mentions

278 )-  SMART Global Holdings Inc. Ordinary Shares  (  SGH  )  - 2 mentions

279 )-  Transocean Ltd (Switzerland) Common Stock  (  RIG  )  - 2 mentions

280 )-  Lindsay Corporation Common Stock  (  LNN  )  - 2 mentions

281 )-  Sirius International Insurance Group Ltd. Common Share  (  SG  )  - 2 mentions

282 )-  Overstock.com Inc. Common Stock  (  OSTK  )  - 2 mentions

283 )-  Manhattan Bridge Capital Inc  (  LOAN  )  - 2 mentions

284 )-  Reliance Steel & Aluminum Co. Common Stock (DE)  (  RS  )  - 2 mentions

285 )-  Neuberger Berman Hig

512 )-  EOG Resources Inc. Common Stock  (  EOG  )  - 1 mentions

513 )-  Electro-Sensors Inc. Common Stock  (  ELSE  )  - 1 mentions

514 )-  Edison International Common Stock  (  EIX  )  - 1 mentions

515 )-  Eagle Pharmaceuticals Inc. Common Stock  (  EGRX  )  - 1 mentions

516 )-  EastGroup Properties Inc. Common Stock  (  EGP  )  - 1 mentions

517 )-  EDAP TMS S.A. American Depositary Shares  (  EDAP  )  - 1 mentions

518 )-  DexCom Inc. Common Stock  (  DXCM  )  - 1 mentions

519 )-  Discover Financial Services Common Stock  (  DFS  )  - 1 mentions

520 )-  DTE Energy Company Common Stock  (  DTE  )  - 1 mentions

521 )-  Dynatrace Inc. Common Stock  (  DT  )  - 1 mentions

522 )-  Diana Shipping inc. common stock  (  DSX  )  - 1 mentions

523 )-  Darden Restaurants Inc. Common Stock  (  DRI  )  - 1 mentions

524 )-  Domino's Pizza Inc Common Stock  (  DPZ  )  - 1 mentions

525 )-  Domo Inc. Class B Common Stock  (  DOMO  )  - 1 mentions

526 )-  Denison Mines Corp Ordinary Share

652 )-  Globus Medical Inc. Class A Common Stock  (  GMED  )  - 1 mentions

653 )-  Gilat Satellite Networks Ltd. Ordinary Shares  (  GILT  )  - 1 mentions

654 )-  Guardant Health Inc. Common Stock  (  GH  )  - 1 mentions

655 )-  Geo Group Inc (The) REIT  (  GEO  )  - 1 mentions

656 )-  Genesis Healthcare Inc. Common Stock  (  GEN  )  - 1 mentions

657 )-  GoodRx Holdings Inc. Class A Common Stock  (  GDRX  )  - 1 mentions

658 )-  Fathom Holdings Inc. Common Stock  (  FTHM  )  - 1 mentions

659 )-  IsoRay Inc. Common Stock (DE)  (  ISR  )  - 1 mentions

660 )-  Whole Earth Brands Inc. Class A Common Stock  (  FREE  )  - 1 mentions

661 )-  Amicus Therapeutics Inc. Common Stock  (  FOLD  )  - 1 mentions

662 )-  Funko Inc. Class A Common Stock  (  FNKO  )  - 1 mentions

663 )-  Fabrinet Ordinary Shares  (  FN  )  - 1 mentions

664 )-  Flex Ltd. Ordinary Shares  (  FLEX  )  - 1 mentions

665 )-  Fiserv Inc. Common Stock  (  FISV  )  - 1 mentions

666 )-  FedEx Corporation Common Stoc