In [10]:
!pip freeze > ../requirements.txt

In [5]:
import os
import sys
sys.path.append(r'..')

import pandas as pd
from scripts.data_analysis import *
from scripts.visualization import *
from scripts.financial_analysis import *
from scripts.data_transformation import *
from scripts.sentiment_analysis import *
from scripts.data_analysis import summarize_data
from utils.data_loaders import load_analyst_ratings, load_yfinance_data

In [None]:
# Load the data
dataset_path = os.path.join(r"../datasets/cleaned", "analyst_ratings_cleaned.csv")
analyst_ratings_df = load_analyst_ratings(dataset_path)
# analyst_ratings_df = load_data(dataset_path)

In [None]:
datasets_folder = os.path.join(r"../datasets/raw/yfinance_data")
dataframes = load_yfinance_data(datasets_folder)

for df in dataframes.values():
    df['Date'] = pd.to_datetime(df['Date'])
dataframes 

In [8]:
df_AAPL = dataframes['AAPL']
df_AMZN = dataframes['AMZN']
df_GOOG = dataframes['GOOG']
df_META = dataframes['META']
df_MSFT = dataframes['MSFT']
df_NVDA = dataframes['NVDA']
df_TSLA = dataframes['TSLA']

In [None]:
tickers = list(dataframes.keys()) # ['AAPL', 'AMZN', 'GOOG', 'META', 'MSFT', 'NVDA', 'TSLA']

columns_to_merge = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
                    'Dividends', 'Stock Splits', 'RSI', 'MACD', 'MACD_signal', 
                    'MACD_hist', 'Signal', 'Daily_Return', 'Strategy_Return']

# for ticker in tickers:
#     locals()[f'df_{ticker}']['Date'] = pd.to_datetime(locals()[f'df_{ticker}']['Date'])
# dataframes = {ticker: globals()[f'df_{ticker}'] for ticker in tickers}
# dataframes = {ticker: locals()[f'df_{ticker}'] for ticker in tickers}
# merged_data = pd.concat([
#     locals()[f'df_{ticker}'][['Date', 'Close']].rename(columns={'Close': ticker}).set_index('Date')
#     for ticker in tickers
# ], axis=1, join='inner')

# Merge Datasets

selected_column = columns_to_merge[3]
# merged_data = pd.concat([dataframes[ticker][['Date', selected_column]]
#                             .set_index('Date') for ticker in tickers], axis=1
#                         ).fillna(method='ffill')


merged_data = merge_dataframes(dataframes, tickers, selected_column)
# merged_data = merge_dataframes(dataframes, tickers, selected_column, start_date='2020-01-01', end_date='2024-01-01')
merged_data

In [None]:
selected_tickers = ['MSFT', 'AAPL', 'AMZN']

calculate_portfolio_indicators(merged_data[selected_tickers])    

In [None]:
calculate_portfolio_weights(merged_data[selected_tickers], tickers=selected_tickers)  

In [None]:
calculate_portfolio_performance(merged_data[selected_tickers])

In [None]:
plot_correlation_matrix(merged_data[selected_tickers])

In [None]:
plot_correlation_heatmap(df, columns=['Close', 'Open', 'High', 'Low'])

In [16]:
# Perform sentiment analysis on headlines for Sentiment Distribution
sentiment_summary = analyze_sentiment(analyst_ratings_df['headline'])
# analyst_ratings_df['sentiment'] = sentiment_summary['sentiment']
# analyst_ratings_df['sentiment_category'] = sentiment_summary['sentiment_category']


In [None]:
#sentiment analysis of news data headlines with VADER
analyst_ratings_df["sentiment"] = analyst_ratings_df["headline"].apply(get_vader_sentiment)
analyst_ratings_df[["headline", "sentiment"]]

In [None]:
# Merge sentiment with stock prices
df_combined = pd.merge(sentiment_summary, merged_data, on='Date', how='inner')
# merged_df = merge_dataframes(stock_df, sentiment_df, on='Date')

In [None]:
# plot_stock_vs_sentiment(df, date_column='Date', sentiment_column='Sentiment', stock_metric='Close')

In [None]:
# Correlation analysis
df_combined[['average_sentiment', 'stock_price']].corr()

In [None]:
# Aggregate sentiment scores by date
# daily_sentiment.rename(columns={"Date": "date"}, inplace=True)
daily_sentiment = analyst_ratings_df.groupby("date")["sentiment"].mean().reset_index()
daily_sentiment

In [None]:
# Convert timestamps to datetime and extract the date
merged_data.rename(columns={"Date": "date"}, inplace=True)
merged_data["date"] = pd.to_datetime(merged_data["date"]).dt.date
daily_sentiment["date"] = pd.to_datetime(daily_sentiment["date"]).dt.date

In [None]:
# Aggregate Sentiment
analyst_ratings_df['date'] = pd.to_datetime(analyst_ratings_df['date'])
analyst_ratings_df.rename(columns={"date": "Date"}, inplace=True)
aggregate_sentiment_summary = aggregate_sentiment(analyst_ratings_df, 'headline', 'sentiment', 'Date', method='textblob')
aggregate_sentiment_summary

In [None]:
# Aligning sentiment score with stock close on same date
co_x = ['Open', 'High', 'Low', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
aligned_data = pd.merge(merged_data, daily_sentiment, on="date", how="inner")

# Align DataFrames by Date and Merge with Stock Data
merged_data.reset_index(inplace=True)
merged_data['Date'] = pd.to_datetime(merged_data['Date'])
aggregate_sentiment_summary['Date'] = pd.to_datetime(aggregate_sentiment_summary['Date'])

# Merge datasets on Date
aligned_data = pd.merge(merged_data, aggregate_sentiment_summary, on="Date", how="inner")
aligned_data


In [None]:

aligned_Data = aligned_data.drop(columns = co_x )

In [None]:
# Compute Daily_Return for all tickers in a multi-index Close column
# aligned_data['Daily_Return'] = aligned_data['Close'].apply(calculate_daily_return)
aligned_Data['Daily_Return'] = calculate_daily_return(aligned_Data['Close'])

# Stock Data with Daily Returns
aligned_Data

In [None]:
# Compute RSI for each column (assuming calculate_RSI can handle Series input)
# aligned_data['RSI'] = aligned_data['Close'].apply(calculate_RSI)
aligned_Data['RSI'] = calculate_RSI(aligned_Data['Close'])

In [None]:
# Plotting Sentiment vs Stock Price
plot_scatter(df_combined['average_sentiment'], df_combined['stock_price'], 
             'Sentiment vs Stock Price', 'Average Sentiment', 'Stock Price', color='green', alpha=0.6)

In [None]:
# Compute correlation between news sentiment score and daily returns
aligned_Data["sentiment"].corr(aligned_Data["Daily_Return"])

In [None]:
# Plotting Sentiment Score vs Daily Stock Return
plot_scatter(aligned_Data["sentiment"], aligned_Data["Daily_Return"], 
             "Sentiment Score vs daily stock return", "Sentiment Score", "daily stock return", color="purple")

In [None]:
# Compute correlation between news sentiment score and stock close
aligned_Data["sentiment"].corr(aligned_Data["Close"])

In [None]:
# Plotting Sentiment Score vs Stock Close Price
plot_scatter(aligned_Data["sentiment"], aligned_Data["Close"], 
             "Sentiment Score vs Stock Close Price", "Sentiment Score", "Stock Closing Price", color="blue")

In [None]:
# Volatility and Sentiment
aligned_Data['Volatility'] = calculate_volatility(aligned_Data['High'], aligned_Data['Low'], aligned_Data['Open'])
plot_scatter(aligned_Data['sentiment'], aligned_Data['Volatility'], "Sentiment vs Volatility", "Sentiment", "Volatility", color="orange")

In [None]:
# Sentiment Clustering
clustered_data = cluster_sentiment(aligned_data, 'sentiment')
plot_scatter(
    clustered_data['sentiment'], clustered_data['Daily_Return'],
    "Clustered Sentiment vs Daily Returns", "Sentiment", "Daily Returns", c=clustered_data['Sentiment_Cluster']
)

In [None]:
# Portfolio Performance
weights = [0.3, 0.4, 0.3]
calculate_portfolio_performance(aligned_data[['AAPL', 'MSFT', 'GOOG']], weights)

In [None]:

# Define the selected tickers
selected_tickers = ['AAPL', 'MSFT', 'GOOG']

stock_prices = combine_stock_data(dataframes, selected_tickers)

returns = calculate_returns_for_tickers(stock_prices)

# Analyze the portfolio's performance using equal-weighted strategy (or custom weights)
portfolio_returns = analyze_portfolio_performance(returns, weights=[0.3, 0.4, 0.3])

In [None]:
# # Calculate portfolio performance
# portfolio_performance = calculate_portfolio_performance_for_tickers(stock_prices, method='inverse_volatility')
# portfolio_performance

In [None]:
# Save Processed Data
aligned_data.to_csv("../datasets/processed/aligned_data.csv", index=False)