## Analyzing How News Influences Stock Market Fluctuations

In [None]:
import pandas as pd

# Read the datasets
df_news = pd.read_csv('../data/sentiment.csv')
df_stock = pd.read_csv('../data/stock_data.csv')

In [None]:
df_news.head()

In [None]:
# Set 'Date' and 'stock' column as part of the index
df_stock = df_stock.set_index('Date')
df_stock = df_stock.set_index('stock', append=True)
df_news = df_news.set_index('Date')
df_news = df_news.set_index('stock', append=True)

In [None]:
# Align the datasets by Date and Stock
aligned_data = pd.merge(df_stock, df_news, 
                        left_index=True, 
                        right_index=True, 
                        how='inner')

In [None]:
# Check the result with the first five rows
aligned_data.head()

In [None]:
# Check size
aligned_data.shape

In [None]:
# Check missing
aligned_data.isnull().sum()

In [None]:
# Check the data types
aligned_data.dtypes

In [None]:
# Ensure 'Date' is in datetime format if not already
aligned_data.index = pd.MultiIndex.from_frame(aligned_data.index.to_frame().assign(Date=pd.to_datetime(aligned_data.index.get_level_values('Date'))))

# Calculate average daily sentiment scores
sentiment_cols = ['neg', 'neu', 'pos', 'compound']
daily_sentiment = aligned_data.groupby(level=['Date', 'stock'])[sentiment_cols].mean()

# Reset index for merging purposes
daily_sentiment = daily_sentiment.reset_index()

In [None]:
# Save it to csv
daily_sentiment.to_csv('../data/daily_sentiment.csv')
daily_sentiment

##### Calculate Stock Movements

In [None]:
daily_retun = aligned_data.reset_index()

print(daily_retun[['Date', 'stock', 'Close', 'Daily_Return']])

In [None]:
# Plot sentiment scores for each stock
import matplotlib.pyplot as plt
stocks = daily_sentiment['stock'].unique()

for stock in stocks:
    stock_data = daily_sentiment[daily_sentiment['stock'] == stock]
    plt.figure(figsize=(12, 6))
    plt.plot(stock_data['Date'], stock_data['neg'], label='Negative Sentiment', color='red')
    plt.plot(stock_data['Date'], stock_data['neu'], label='Neutral Sentiment', color='grey')
    plt.plot(stock_data['Date'], stock_data['pos'], label='Positive Sentiment', color='green')
    #plt.plot(stock_data['Date'], stock_data['compound'], label='Compound Sentiment', color='blue')
    
    plt.title(f'Sentiment Scores for {stock}')
    plt.xlabel('Date')
    plt.ylabel('Sentiment Score')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

#### Calculate Correlation

In [None]:
def calculate_correlation(df):
    # Assuming 'Daily_Return' is already calculated in your DataFrame
    sentiment_cols = ['neg', 'neu', 'pos', 'compound']
    
    # Calculate the correlation between sentiment scores and daily returns
    correlations = {}
    for col in sentiment_cols:
        correlation = df[col].corr(df['Daily_Return'])
        correlations[col] = correlation
    
    # Convert the correlations to a DataFrame for better visualization
    correlation_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Correlation with Daily Return'])
    
    return correlation_df

# Let's calculate the daily correlation of apple
df = aligned_data.reset_index()
correlation_df = calculate_correlation(df[df['stock']=='AAPL'])
print(correlation_df)