 Merge all the NVDA twits to a csv file
 
 From 2013-04-11 to 2022-02-28

In [16]:
import pandas as pd
import glob
import os
import yfinance as yf
import datetime

In [17]:
# Set the folder path containing CSV files
folder_path = '/Users/zhanghanshi/Desktop/WM_Project/NVDA_2013_2022/'  # Replace with your actual path

# Get all CSV file paths in the folder
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Initialize a list to hold DataFrames
df_list = []

# Read each CSV file and append to the list
for file in csv_files:
    try:
        df = pd.read_csv(file)
        df_list.append(df)
    except Exception as e:
        print(f"Failed to read {file}: {e}")

# Concatenate all DataFrames into one
combined_df = pd.concat(df_list, ignore_index=True)

# Convert 'created_at' column to datetime format (UTC)
combined_df['created_at'] = pd.to_datetime(combined_df['created_at'], utc=True)

# Sort the DataFrame by 'created_at' in descending order
combined_df.sort_values('created_at', ascending=True, inplace=True)


combined_df.head()



Unnamed: 0.1,Unnamed: 0,id,body,created_at,user,source,symbols,mentioned_users,entities,likes,links,conversation,reshare_message,reshares,owned_symbols,structurable
584180,14999,12994205,$NVDA shorts using a lot of ammo trying to kee...,2013-04-11 19:34:19+00:00,"{'id': 18393, 'username': 'fatcoin888', 'name'...","{'id': 1, 'title': 'Stocktwits', 'url': 'https...","[{'id': 2925, 'symbol': 'NVDA', 'title': 'NVID...",[],{'sentiment': None},,,,,,,
584179,14998,12995313,"$NVDA quite a rebound today, i can hardly beli...",2013-04-11 20:03:53+00:00,"{'id': 37084, 'username': 'peter5slo', 'name':...","{'id': 1, 'title': 'Stocktwits', 'url': 'https...","[{'id': 2925, 'symbol': 'NVDA', 'title': 'NVID...",[],{'sentiment': None},,,,,,,
584178,14997,12995587,As I said just over a week ago that $NVDA woul...,2013-04-11 20:12:52+00:00,"{'id': 27639, 'username': 'CDMCapital', 'name'...","{'id': 1, 'title': 'Stocktwits', 'url': 'https...","[{'id': 2925, 'symbol': 'NVDA', 'title': 'NVID...",[],{'sentiment': {'basic': 'Bullish'}},,,,,,,
584177,14996,12995655,$NVDA shorting here is like picking up dimes i...,2013-04-11 20:15:08+00:00,"{'id': 27639, 'username': 'CDMCapital', 'name'...","{'id': 1, 'title': 'Stocktwits', 'url': 'https...","[{'id': 2925, 'symbol': 'NVDA', 'title': 'NVID...",[],{'sentiment': {'basic': 'Bullish'}},"{'total': 1, 'user_ids': [55546]}",,,,,,
584176,14995,12997231,Nvidia investing in â€˜once in a lifetime oppo...,2013-04-11 21:35:07+00:00,"{'id': 222797, 'username': 'StockTwitsLinks', ...","{'id': 1, 'title': 'Stocktwits', 'url': 'https...","[{'id': 2925, 'symbol': 'NVDA', 'title': 'NVID...",[],{'sentiment': None},,"[{'title': ""Nvidia investing in 'once in a lif...",,,,,


In [18]:
# Save the sorted DataFrame to a new CSV file
output_path = '/Users/zhanghanshi/Desktop/WM_Project/my_project/nvda_twits_combined_sorted.csv'
combined_df.to_csv(output_path, index=False)

Get VIX and Stock Data 

Then merge to another CSV

In [19]:
# VIX data
vix_ticker = yf.Ticker("^VIX")

# Get today's date
today = datetime.date.today().strftime('%Y-%m-%d')

# Use the history method to fetch the data
vix_data = vix_ticker.history(start="2013-04-11", end="2022-02-28")

# Resetting the index to have Date as a column
vix_data = vix_data.reset_index()

# Selecting columns and converting column names to lowercase
vix_data = vix_data[['Date', 'Open', 'High', 'Low', 'Close']].copy()
vix_data.columns = ['date' if col == 'Date' else 'vix_' + col.lower() for col in vix_data.columns]  # Convert column names to lowercase

vix_data['date'] = vix_data['date'].dt.strftime('%Y-%m-%d')  # Ensure date is in the desired format

vix_data.head()


Unnamed: 0,date,vix_open,vix_high,vix_low,vix_close
0,2013-04-11,12.55,12.62,12.15,12.24
1,2013-04-12,12.61,13.12,11.99,12.06
2,2013-04-15,13.12,17.27,12.66,17.27
3,2013-04-16,14.72,14.87,13.91,13.96
4,2013-04-17,15.35,17.9,14.98,16.51


In [20]:
# NVDA data
nvda_ticker = yf.Ticker("NVDA")

# Get today's date
today = datetime.date.today().strftime('%Y-%m-%d')

# Use the history method to fetch the data
nvda_data = nvda_ticker.history(start="2013-04-11", end="2022-02-28")

# Resetting the index to have Date as a column
nvda_data = nvda_data.reset_index()

# Selecting columns and converting column names to lowercase
nvda_data = nvda_data[['Date', 'Open', 'High', 'Low', 'Close','Volume']].copy()
nvda_data.columns = ['date' if col == 'Date' else "nvda_" + col.lower() for col in nvda_data.columns]  # Convert column names to lowercase

nvda_data['date'] = nvda_data['date'].dt.strftime('%Y-%m-%d')  # Ensure date is in the desired format

nvda_data.head()

Unnamed: 0,date,nvda_open,nvda_high,nvda_low,nvda_close,nvda_volume
0,2013-04-11,0.293663,0.298306,0.285538,0.296449,969220000
1,2013-04-12,0.297842,0.307127,0.295288,0.303877,770548000
2,2013-04-15,0.302949,0.303877,0.295752,0.29761,430420000
3,2013-04-16,0.300395,0.302949,0.29877,0.30086,468844000
4,2013-04-17,0.298538,0.301324,0.295056,0.296913,532948000


In [21]:
# Merge vix_data and nvda_data on the 'date' column using inner join
merged_data = pd.merge(nvda_data, vix_data, on='date', how='inner')

merged_data.head()

Unnamed: 0,date,nvda_open,nvda_high,nvda_low,nvda_close,nvda_volume,vix_open,vix_high,vix_low,vix_close
0,2013-04-11,0.293663,0.298306,0.285538,0.296449,969220000,12.55,12.62,12.15,12.24
1,2013-04-12,0.297842,0.307127,0.295288,0.303877,770548000,12.61,13.12,11.99,12.06
2,2013-04-15,0.302949,0.303877,0.295752,0.29761,430420000,13.12,17.27,12.66,17.27
3,2013-04-16,0.300395,0.302949,0.29877,0.30086,468844000,14.72,14.87,13.91,13.96
4,2013-04-17,0.298538,0.301324,0.295056,0.296913,532948000,15.35,17.9,14.98,16.51


In [22]:
# Save the merged DataFrame to a new CSV file
output_path = '/Users/zhanghanshi/Desktop/WM_Project/my_project/merged_nvda_fin_data.csv'
combined_df.to_csv(output_path, index=False)