In [1]:
import pandas as pd

sentiment_df = pd.read_csv('stock_tweets_with_sentiment.csv')
sentiment_df.head()

Unnamed: 0,Date,Tweet,Stock Name,Company Name,sentiment_negative,sentiment_neutral,sentiment_positive,sentiment_compound,sentiment_label
0,2022-09-29,Mainstream media has done an amazing job at br...,TSLA,"Tesla, Inc.",0.125,0.763,0.113,0.0772,Positive
1,2022-09-29,Tesla delivery estimates are at around 364k fr...,TSLA,"Tesla, Inc.",0.0,1.0,0.0,0.0,Neutral
2,2022-09-29,3/ Even if I include 63.0M unvested RSUs as of...,TSLA,"Tesla, Inc.",0.0,0.954,0.046,0.296,Positive
3,2022-09-29,@RealDanODowd @WholeMarsBlog @Tesla Hahaha why...,TSLA,"Tesla, Inc.",0.273,0.59,0.137,-0.7568,Negative
4,2022-09-29,"@RealDanODowd @Tesla Stop trying to kill kids,...",TSLA,"Tesla, Inc.",0.526,0.474,0.0,-0.875,Negative


In [2]:
# Select only specific tech stocks from the dataset
tech_sentiment_df = sentiment_df.loc[sentiment_df['Stock Name'].isin(['AAPL', 'GOOG', 'INTC', 'META', 'MSFT'])]
tech_sentiment_df

Unnamed: 0,Date,Tweet,Stock Name,Company Name,sentiment_negative,sentiment_neutral,sentiment_positive,sentiment_compound,sentiment_label
37422,2022-09-29,A group of lawmakers led by Sen. Elizabeth War...,MSFT,Microsoft Corporation,0.079,0.850,0.070,-0.0772,Negative
37423,2022-09-29,$NIO just because I'm down money doesn't mean ...,MSFT,Microsoft Corporation,0.155,0.690,0.155,0.2500,Positive
37424,2022-09-29,Todayâ€™s drop in $SPX is a perfect example of w...,MSFT,Microsoft Corporation,0.161,0.777,0.061,-0.6197,Negative
37425,2022-09-29,Druckenmiller owned $CVNA this year \nMunger b...,MSFT,Microsoft Corporation,0.064,0.854,0.082,0.2382,Positive
37426,2022-09-29,Top 10 $QQQ Holdings \n\nAnd Credit Rating\n\n...,MSFT,Microsoft Corporation,0.000,0.830,0.170,0.7783,Positive
...,...,...,...,...,...,...,...,...,...
75810,2021-10-17,A packed earnings week!\n\n$STT\n$ELS\n$ACO\n$...,INTC,Intel Corporation,0.000,0.956,0.044,0.3164,Positive
75811,2021-10-16,ðŸ’¥BIG WEEK OF Q3 EARNINGS AHEAD ðŸ‘€ðŸ‘€\n\nMon: -\nT...,INTC,Intel Corporation,0.000,1.000,0.000,0.0000,Neutral
75812,2021-10-16,A packed earnings week!\n\n$STT\n$ELS\n$ACO\n$...,INTC,Intel Corporation,0.000,0.956,0.044,0.3164,Positive
75813,2021-10-14,TSMC node transitions are slowing down heavily...,INTC,Intel Corporation,0.113,0.887,0.000,-0.6792,Negative


In [5]:
# Create separate dataframes for each stock with selected columns
selected_columns = ['Date', 'Stock Name', 'sentiment_negative', 'sentiment_neutral', 'sentiment_positive', 'sentiment_compound']

# Dictionary to store dataframes for each stock
stock_dfs = {}

# Create separate dataframes for each stock
for stock in ['AAPL', 'GOOG', 'INTC', 'META', 'MSFT']:
    stock_dfs[stock] = tech_sentiment_df[tech_sentiment_df['Stock Name'] == stock][selected_columns].copy()
    print(f"\nFirst few rows of {stock} dataframe:")
    print(stock_dfs[stock].head())


First few rows of AAPL dataframe:
             Date Stock Name  sentiment_negative  sentiment_neutral  \
55958  2022-09-29       AAPL               0.155              0.690   
55959  2022-09-29       AAPL               0.000              0.918   
55960  2022-09-29       AAPL               0.000              1.000   
55961  2022-09-29       AAPL               0.092              0.708   
55962  2022-09-29       AAPL               0.143              0.857   

       sentiment_positive  sentiment_compound  
55958               0.155              0.2500  
55959               0.082              0.3612  
55960               0.000              0.0000  
55961               0.200              0.6126  
55962               0.000             -0.6523  

First few rows of GOOG dataframe:
             Date Stock Name  sentiment_negative  sentiment_neutral  \
52440  2022-09-29       GOOG               0.000              0.830   
52441  2022-09-29       GOOG               0.000              0.842   
52

In [6]:
# Calculate daily average sentiments for each stock
daily_stock_dfs = {}

for stock in stock_dfs:
    # Group by date and calculate mean of sentiment columns
    daily_stock_dfs[stock] = stock_dfs[stock].groupby('Date').agg({
        'sentiment_negative': 'mean',
        'sentiment_neutral': 'mean',
        'sentiment_positive': 'mean',
        'sentiment_compound': 'mean'
    }).reset_index()
    
    # Add stock name column back
    daily_stock_dfs[stock]['Stock Name'] = stock
    
    print(f"\nFirst few rows of {stock} daily averaged dataframe:")
    print(daily_stock_dfs[stock].head())


First few rows of AAPL daily averaged dataframe:
         Date  sentiment_negative  sentiment_neutral  sentiment_positive  \
0  2021-09-30            0.051286           0.851143            0.097571   
1  2021-10-01            0.024455           0.872455            0.103182   
2  2021-10-02            0.021500           0.935500            0.043000   
3  2021-10-03            0.000000           0.839000            0.161000   
4  2021-10-04            0.030900           0.910000            0.059300   

   sentiment_compound Stock Name  
0            0.098900       AAPL  
1            0.248255       AAPL  
2            0.007525       AAPL  
3            0.822500       AAPL  
4            0.122830       AAPL  

First few rows of GOOG daily averaged dataframe:
         Date  sentiment_negative  sentiment_neutral  sentiment_positive  \
0  2021-09-30             0.00000             0.4080               0.592   
1  2021-10-01             0.17350             0.8265               0.000   
2  20

In [7]:
# Add sentiment label based on compound score
for stock in daily_stock_dfs:
    # Create sentiment label using numpy where
    daily_stock_dfs[stock]['sentiment_label'] = 'Neutral'  # default value
    
    # Update based on conditions
    daily_stock_dfs[stock].loc[daily_stock_dfs[stock]['sentiment_compound'] >= 0.05, 'sentiment_label'] = 'Positive'
    daily_stock_dfs[stock].loc[daily_stock_dfs[stock]['sentiment_compound'] <= -0.05, 'sentiment_label'] = 'Negative'
    
    print(f"\nFirst few rows of {stock} dataframe with sentiment labels:")
    print(daily_stock_dfs[stock].head())


First few rows of AAPL dataframe with sentiment labels:
         Date  sentiment_negative  sentiment_neutral  sentiment_positive  \
0  2021-09-30            0.051286           0.851143            0.097571   
1  2021-10-01            0.024455           0.872455            0.103182   
2  2021-10-02            0.021500           0.935500            0.043000   
3  2021-10-03            0.000000           0.839000            0.161000   
4  2021-10-04            0.030900           0.910000            0.059300   

   sentiment_compound Stock Name sentiment_label  
0            0.098900       AAPL        Positive  
1            0.248255       AAPL        Positive  
2            0.007525       AAPL         Neutral  
3            0.822500       AAPL        Positive  
4            0.122830       AAPL        Positive  

First few rows of GOOG dataframe with sentiment labels:
         Date  sentiment_negative  sentiment_neutral  sentiment_positive  \
0  2021-09-30             0.00000             0

In [8]:
# Save each stock's dataframe to a CSV file
for stock in daily_stock_dfs:
    filename = f"{stock}_avg_sentiment_data.csv"
    daily_stock_dfs[stock].to_csv(filename, index=False)
    print(f"Saved {filename}")

Saved AAPL_avg_sentiment_data.csv
Saved GOOG_avg_sentiment_data.csv
Saved INTC_avg_sentiment_data.csv
Saved META_avg_sentiment_data.csv
Saved MSFT_avg_sentiment_data.csv
