In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns



In [2]:
df1 = pd.read_csv('nasdaq_stocks_data_1.csv')
df2 = pd.read_csv('nasdaq_stocks_data_2.csv')
df3 = pd.read_csv('moving_ma.csv')
df4 = pd.read_csv('stock_percent_changes.csv')
df5 = pd.read_csv('nasdaq_stocks_news.csv')
data = pd.concat([df1, df2, df3, df4, df5], axis=1)

In [3]:
#The dividend column is an important column. Filling it with -1 if its nan which will indicate if the
# Company does not give out dividends
data['Dividends'].fillna(-1, inplace=True)
data = data.drop(columns=['Stock']) 
data.to_csv('complete_data.csv', index=False)

In [4]:
del data['Company Name']

In [5]:
data.columns

Index(['Shares Number', 'Volume', 'Net Income', 'Dividends', 'Employee Amount',
       'Previous Close', 'Analysts', 'Price Target', 'Earnings Date', 'Symbol',
       'Market Cap', 'Stock Price', 'Revenue', 'MA20', 'MA50', 'MA200',
       '20-Day Percent Change', '100-Day Percent Change', 'News1', 'News2',
       'News3', 'News4', 'News5'],
      dtype='object')

In [6]:
# Convert relevant columns to numeric
data['Shares Number'] = pd.to_numeric(data['Shares Number'].str.replace('B', ''), errors='coerce')
data['Net Income'] = pd.to_numeric(data['Net Income'].str.replace('B', ''), errors='coerce')
data['Dividends'] = pd.to_numeric(data['Dividends'].str.replace('$', ''), errors='coerce')
data['Market Cap'] = pd.to_numeric(data['Market Cap'].str.replace('B', ''), errors='coerce')
data['Stock Price'] = pd.to_numeric(data['Stock Price'], errors='coerce')
data['Revenue'] = pd.to_numeric(data['Revenue'].str.replace('B', ''), errors='coerce')
data['Previous Close'] = pd.to_numeric(data['Previous Close'], errors='coerce')
data['Price Target'] = pd.to_numeric(data['Price Target'], errors='coerce')
data['Employee Amount'] = pd.to_numeric(data['Employee Amount'].str.replace(',', ''), errors='coerce')

# Create new columns
data['P/E Ratio'] = data['Stock Price'] / (data['Net Income'] / data['Shares Number'])
data['Dividend Yield (%)'] = (data['Dividends'] / data['Stock Price']) * 100
data['Market Cap to Revenue Ratio'] = data['Market Cap'] / data['Revenue']
data['Earnings Per Share (EPS)'] = data['Net Income'] / data['Shares Number']
data['Revenue Per Employee'] = data['Revenue'] / data['Employee Amount']
data['Target Price Difference (%)'] = ((data['Price Target'] - data['Stock Price']) / data['Stock Price']) * 100

# Display the data
data


Unnamed: 0,Shares Number,Volume,Net Income,Dividends,Employee Amount,Previous Close,Analysts,Price Target,Earnings Date,Symbol,...,News2,News3,News4,News5,P/E Ratio,Dividend Yield (%),Market Cap to Revenue Ratio,Earnings Per Share (EPS),Revenue Per Employee,Target Price Difference (%)
0,15.20,29834257,101.96,1.00,161000.0,226.840,Buy,236.31,"Aug 1, 2024",AAPL,...,"Cramer’s week ahead: iPhone 16 launch, consume...",Masimo CEO shared confidential documents with ...,"Keep an eye on Meta, says KKM’s Jeff Kilburg",What to expect from Apple’s ‘It’s Glowtime’ event,33.816869,0.440839,,6.707895,0.002395,4.174749
1,24.58,310565961,42.60,0.04,29600.0,129.370,Strong Buy,136.74,"Aug 28, 2024",NVDA,...,This semiconductor ETF just had its worst week...,"Cramer: There isn’t an AI bubble, just intense...",Jim Cramer deep dives into the weakness in chi...,Cramer pushes back against a ‘new and negative...,74.645883,0.030919,,1.733116,0.002695,5.696839
2,7.43,10814122,88.14,3.00,228000.0,416.790,Strong Buy,493.97,"Jul 30, 2024",MSFT,...,Bill Gates wants to work another 20 to 30 year...,Wells Fargo’s top picks in September include t...,Cramer pushes back against a ‘new and negative...,OpenAI gives artists access to unreleased tool...,35.134442,0.719787,,11.862719,0.001075,18.517719
3,12.31,11634952,87.66,0.80,182381.0,167.430,Buy,200.36,"Jul 23, 2024",GOOG,...,YouTube group The Try Guys quickly found succe...,Cramer pushes back against a ‘new and negative...,"Cramer’s week ahead: iPhone 16 launch, consume...",Top 10 people most likely to reach trillionair...,23.512016,0.477812,,7.121040,0.001800,19.667921
4,12.31,10831461,87.66,0.80,182502.0,165.620,Buy,199.27,"Jul 23, 2024",GOOGL,...,YouTube group The Try Guys quickly found succe...,Cramer pushes back against a ‘new and negative...,"Cramer’s week ahead: iPhone 16 launch, consume...",Top 10 people most likely to reach trillionair...,23.257839,0.483033,,7.121040,0.001799,20.317594
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3495,,338760,,,131.0,1.030,Strong Buy,3.00,"Aug 13, 2024",GWAV,...,,,,,,,,,,525.000000
3496,,117841,,,152.0,0.462,Hold,,"Aug 13, 2024",LEDS,...,,,,,,,,,,
3497,,,,,46.0,,,,,WISA,...,"WiSA Technologies to acquire assets, IP from D...",WiSA Technologies VP of Finance Gary Williams ...,"WiSA Technologies, Inc. (WISA) Q2 Earnings Che...",,,,,,,
3498,,1452,,,43.0,1.800,,,"Sep 3, 2024",ASNS,...,Actelis Networks Navigates Acquisition of Qual...,Actelis Receives New Order for Intelligent Tra...,Actelis Networks receives new order for ITS ne...,Actelis Networks Unveils Investor Communicatio...,,,,,,


In [7]:
numerical_data = data.select_dtypes(include='number')

# Find correlations between numerical columns
correlation_matrix = numerical_data.corr()

# Display the correlation matrix
print(correlation_matrix)

                             Shares Number  Net Income  Dividends  \
Shares Number                     1.000000    0.531078  -0.321415   
Net Income                        0.531078    1.000000  -0.083805   
Dividends                        -0.321415   -0.083805   1.000000   
Employee Amount                   0.395881    0.311312   0.068495   
Previous Close                    0.174658    0.142211   0.626485   
Price Target                      0.167201    0.145060   0.614630   
Market Cap                        0.224230    0.728605   0.220029   
Stock Price                       0.055493    0.195542   0.352513   
Revenue                           0.550942    0.686210   0.017547   
P/E Ratio                         0.424992   -0.070560  -0.044011   
Dividend Yield (%)               -0.052547   -0.171305   0.029473   
Market Cap to Revenue Ratio       0.002006    0.201228   0.169481   
Earnings Per Share (EPS)          0.060613    0.606004   0.388143   
Revenue Per Employee             -