In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 
import yfinance as yf 
import requests
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import json 

from collections import defaultdict
from datetime import datetime

import praw as praw

In [50]:
import pandas as pd
import requests, zipfile
from io import BytesIO
from datetime import datetime, timedelta

start_date = datetime(2023, 4, 12)
end_date = datetime(2023, 5, 12)
interval_hours = [0, 6, 12, 18]

results = []

for single_date in pd.date_range(start_date, end_date):
    for hour in interval_hours:
        timestamp = single_date.replace(hour=hour).strftime('%Y%m%d%H%M%S')
        url = f"http://data.gdeltproject.org/gdeltv2/{timestamp}.gkg.csv.zip"

        try:
            r = requests.get(url)
            if r.status_code == 200:
                with zipfile.ZipFile(BytesIO(r.content)) as z:
                    file_name = z.namelist()[0]
                    with z.open(file_name) as f:
                        df = pd.read_csv(f, sep='\t', header=None, low_memory=False, encoding='ISO-8859-1')

                        if df.shape[1] >= 21:
                            df = df.iloc[:, :21]
                            df.columns = [
                                "GKGRECORDID", "DATE", "SourceCollectionIdentifier", "SourceCommonName", "DocumentIdentifier",
                                "Themes", "Locations", "Persons", "Organizations", "V2Tone", "Dates", "GCAM", "SharingImage",
                                "RelatedImages", "SocialImageEmbeds", "SocialVideoEmbeds", "Quotations", "AllNames", "Amounts",
                                "TranslationInfo", "Extras"
                            ]

                            for _, row in df.iterrows():
                                tone_str = str(row['V2Tone'])
                                if tone_str and '#' in tone_str:
                                    try:
                                        tone = float(tone_str.split('#')[0])  # Only take the first number before a comma
                                        results.append({
                                            'datetime': single_date.replace(hour=hour),
                                            'tone': tone,
                                            'url': row['DocumentIdentifier']
                                        })
                                    except Exception as e:
                                        print(f"Tone parse error: {e} — Raw tone: {tone_str}")
            else:
                print(f"No file for {timestamp} — Status: {r.status_code}")
        except Exception as e:
            print(f"Error for {timestamp}: {e}")

# Turn into DataFrame
sentiment_df = pd.DataFrame(results)

if not sentiment_df.empty:
    sentiment_df['datetime'] = pd.to_datetime(sentiment_df['datetime'])
    sentiment_df = sentiment_df.dropna(subset=['tone'])
    daily_sentiment = sentiment_df.groupby(sentiment_df['datetime'].dt.date)['tone'].mean().reset_index()
    daily_sentiment.columns = ['date', 'avg_tone']
    print(daily_sentiment)
else:
    print("No sentiment data found in the sample range.")
    


KeyboardInterrupt: 

In [7]:
import os
import pandas as pd
import requests, zipfile
from io import BytesIO
from datetime import datetime, timedelta

start_date = datetime(2023, 5, 12)
end_date = datetime(2025, 4, 12)
interval_hours = [0, 6, 12, 18]

# Folder to save monthly results
os.makedirs("monthly_sentiment", exist_ok=True)

# Loop through months
current = start_date
while current <= end_date:
    # Define month range
    month_start = current.replace(day=1)
    next_month = (month_start + timedelta(days=32)).replace(day=1)
    month_end = next_month - timedelta(days=1)

    # CSV file path
    filename = f"monthly_sentiment/sentiment_{month_start.strftime('%Y_%m')}.csv"
    if os.path.exists(filename):
        print(f"Already processed: {filename}")
        current = next_month
        continue

    print(f"Processing {month_start.strftime('%B %Y')}...")

    results = []
    for single_date in pd.date_range(month_start, month_end):
        for hour in interval_hours:
            timestamp = single_date.replace(hour=hour).strftime('%Y%m%d%H%M%S')
            url = f"http://data.gdeltproject.org/gdeltv2/{timestamp}.gkg.csv.zip"

            try:
                r = requests.get(url)
                if r.status_code == 200:
                    with zipfile.ZipFile(BytesIO(r.content)) as z:
                        file_name = z.namelist()[0]
                        with z.open(file_name) as f:
                            df = pd.read_csv(f, sep='\t', header=None, low_memory=False, encoding='ISO-8859-1')

                            if df.shape[1] >= 21:
                                df = df.iloc[:, :21]
                                df.columns = [
                                    "GKGRECORDID", "DATE", "SourceCollectionIdentifier", "SourceCommonName", "DocumentIdentifier",
                                    "Themes", "Locations", "Persons", "Organizations", "V2Tone", "Dates", "GCAM", "SharingImage",
                                    "RelatedImages", "SocialImageEmbeds", "SocialVideoEmbeds", "Quotations", "AllNames", "Amounts",
                                    "TranslationInfo", "Extras"
                                ]

                                for _, row in df.iterrows():
                                    tone_str = str(row['V2Tone'])
                                    if tone_str and '#' in tone_str:
                                        try:
                                            tone = float(tone_str.split('#')[0])
                                            results.append({
                                                'datetime': single_date.replace(hour=hour),
                                                'tone': tone,
                                                'url': row['DocumentIdentifier']
                                            })
                                        except Exception as e:
                                            print(f"Tone parse error: {e} — Raw tone: {tone_str}")
                else:
                    print(f"No file for {timestamp} — Status: {r.status_code}")
            except Exception as e:
                print(f"Error for {timestamp}: {e}")

    # Convert to DataFrame and save
    sentiment_df = pd.DataFrame(results)
    if not sentiment_df.empty:
        sentiment_df['datetime'] = pd.to_datetime(sentiment_df['datetime'])
        sentiment_df = sentiment_df.dropna(subset=['tone'])
        daily_sentiment = sentiment_df.groupby(sentiment_df['datetime'].dt.date)['tone'].mean().reset_index()
        daily_sentiment.columns = ['date', 'avg_tone']
        daily_sentiment.to_csv(filename, index=False)
        print(f"Saved {filename}")
    else:
        print(f"No sentiment data for {month_start.strftime('%Y-%m')}")

    current = next_month

Processing May 2023...
Saved monthly_sentiment/sentiment_2023_05.csv
Processing June 2023...
Error for 20230616060000: Error tokenizing data. C error: Expected 27 fields in line 784, saw 32

Saved monthly_sentiment/sentiment_2023_06.csv
Processing July 2023...
Saved monthly_sentiment/sentiment_2023_07.csv
Processing August 2023...
Saved monthly_sentiment/sentiment_2023_08.csv
Processing September 2023...
Saved monthly_sentiment/sentiment_2023_09.csv
Processing October 2023...
Saved monthly_sentiment/sentiment_2023_10.csv
Processing November 2023...
Saved monthly_sentiment/sentiment_2023_11.csv
Processing December 2023...
Saved monthly_sentiment/sentiment_2023_12.csv
Processing January 2024...
Saved monthly_sentiment/sentiment_2024_01.csv
Processing February 2024...
Saved monthly_sentiment/sentiment_2024_02.csv
Processing March 2024...
Saved monthly_sentiment/sentiment_2024_03.csv
Processing April 2024...
Saved monthly_sentiment/sentiment_2024_04.csv
Processing May 2024...
Saved monthly

In [2]:
import glob

all_files = glob.glob("monthly_sentiment/*.csv")
df_all = pd.concat((pd.read_csv(f) for f in all_files))
df_all.to_csv("all_sentiment_data.csv", index=False)

In [3]:
big_sent_df = pd.read_csv("all_sentiment_data.csv")
small_sent_df = pd.read_csv("daily_sentiment.csv")
big_sent_df, small_sent_df

(           date  avg_tone
 0    2024-07-01  2.278929
 1    2024-07-02  2.289531
 2    2024-07-03  2.319873
 3    2024-07-04  2.401594
 4    2024-07-05  2.414356
 ..          ...       ...
 707  2025-01-27  2.370711
 708  2025-01-28  2.277915
 709  2025-01-29  2.261435
 710  2025-01-30  2.314914
 711  2025-01-31  2.214110
 
 [712 rows x 2 columns],
           date  avg_tone
 0   2023-04-12  2.287342
 1   2023-04-13  2.316311
 2   2023-04-14  2.306526
 3   2023-04-15  2.341401
 4   2023-04-16  2.462421
 5   2023-04-17  2.299344
 6   2023-04-18  2.298309
 7   2023-04-19  2.288625
 8   2023-04-20  2.281591
 9   2023-04-21  2.309622
 10  2023-04-22  2.345164
 11  2023-04-23  2.338715
 12  2023-04-24  2.311514
 13  2023-04-25  2.227038
 14  2023-04-26  2.273817
 15  2023-04-27  2.307471
 16  2023-04-28  2.249710
 17  2023-04-29  2.388493
 18  2023-04-30  2.311260
 19  2023-05-01  2.316471
 20  2023-05-02  2.300062
 21  2023-05-03  2.277539
 22  2023-05-04  2.331174
 23  2023-05-05  2.373782

In [4]:
merged_df = pd.merge(big_sent_df, small_sent_df, on='date', how='outer', suffixes=('_big', '_small'))

# 2. Handle duplicates: if both 'avg_tone_big' and 'avg_tone_small' exist, choose one
#    For example, prefer 'small' if available, else use 'big'
merged_df['avg_tone'] = merged_df['avg_tone_small'].combine_first(merged_df['avg_tone_big'])

# 3. Drop the individual source columns
merged_df = merged_df[['date', 'avg_tone']]

# 4. Optional: sort by date
merged_df = merged_df.sort_values('date').reset_index(drop=True)

print(merged_df)

           date  avg_tone
0    2023-04-12  2.287342
1    2023-04-13  2.316311
2    2023-04-14  2.306526
3    2023-04-15  2.341401
4    2023-04-16  2.462421
..          ...       ...
726  2025-04-07  2.276755
727  2025-04-08  2.270042
728  2025-04-09  2.194243
729  2025-04-10  2.225689
730  2025-04-11  2.285928

[731 rows x 2 columns]


In [5]:
merged_df

Unnamed: 0,date,avg_tone
0,2023-04-12,2.287342
1,2023-04-13,2.316311
2,2023-04-14,2.306526
3,2023-04-15,2.341401
4,2023-04-16,2.462421
...,...,...
726,2025-04-07,2.276755
727,2025-04-08,2.270042
728,2025-04-09,2.194243
729,2025-04-10,2.225689


Thesis: Sentiment Outweighs fundamental value in driving AAPL prices

In [6]:
from datetime import datetime, timedelta

api_key = "4C9mXIrDgrBA8MMxnsf1jtSYykOLSCT2"
symbols = ['AAPL']

cutoff_date = datetime.today() - timedelta(days=2*365)

price_data_all = []

for symbol in symbols:
    url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{symbol}?apikey={api_key}"
    
    try:
        res = requests.get(url)
        data = res.json()

        if "historical" in data:
            df = pd.DataFrame(data["historical"])
            df["date"] = pd.to_datetime(df["date"])  # Ensure the date column is in datetime format
            df = df[df["date"] >= cutoff_date]  # Filter data for the last 2 years
            df["symbol"] = symbol
            df = df[["date", "symbol", "adjClose", "volume"]]
            price_data_all.append(df)
            print(f"✅ Price data fetched for {symbol}")
        else:
            print(f"⚠️ No historical data found for {symbol}. Response: {data}")
    
    except Exception as e:
        print(f"❌ Error fetching price data for {symbol}: {e}")



✅ Price data fetched for AAPL


In [7]:
price_data_all

[          date symbol  adjClose     volume
 0   2025-04-16   AAPL   197.225   26249918
 1   2025-04-15   AAPL   202.140   50304417
 2   2025-04-14   AAPL   202.520  101352911
 3   2025-04-11   AAPL   198.150   87435915
 4   2025-04-10   AAPL   190.420  121880000
 ..         ...    ...       ...        ...
 497 2023-04-24   AAPL   163.680   41949600
 498 2023-04-21   AAPL   163.370   58337341
 499 2023-04-20   AAPL   164.990   52456400
 500 2023-04-19   AAPL   165.960   47720200
 501 2023-04-18   AAPL   164.810   49923008
 
 [502 rows x 4 columns]]

In [8]:
# Combine all dataframes in the list into a single dataframe
combined_price_data = pd.concat(price_data_all, ignore_index=True)

# Save the combined dataframe to a CSV file
combined_price_data.to_csv("fundamental_data.csv", index=False)
combined_price_data

Unnamed: 0,date,symbol,adjClose,volume
0,2025-04-16,AAPL,197.225,26249918
1,2025-04-15,AAPL,202.140,50304417
2,2025-04-14,AAPL,202.520,101352911
3,2025-04-11,AAPL,198.150,87435915
4,2025-04-10,AAPL,190.420,121880000
...,...,...,...,...
497,2023-04-24,AAPL,163.680,41949600
498,2023-04-21,AAPL,163.370,58337341
499,2023-04-20,AAPL,164.990,52456400
500,2023-04-19,AAPL,165.960,47720200


In [9]:
adjClose = combined_price_data['adjClose']
volume =combined_price_data['volume']

returns = 100 * adjClose.pct_change().dropna()
returns_df = returns.to_frame()
returns_df['adj_close'] = adjClose
returns_df


Unnamed: 0,adjClose,adj_close
1,2.492078,202.14
2,0.187989,202.52
3,-2.157812,198.15
4,-3.901085,190.42
5,4.427056,198.85
...,...,...
497,0.956023,163.68
498,-0.189394,163.37
499,0.991614,164.99
500,0.587914,165.96


In [10]:
combined_price_data['date'] = pd.to_datetime(combined_price_data['date'])

# Filter for 2023 and 2024
data_2023_2024 = combined_price_data[
    combined_price_data['date'].dt.year.isin([2023, 2024])
].copy()

# Calculate daily returns
data_2023_2024['daily_return'] = data_2023_2024['adjClose'].pct_change()

# Add 'quarter' column
data_2023_2024['quarter'] = data_2023_2024['date'].dt.to_period('Q')

# Calculate **compounded** quarterly returns
quarterly_returns = data_2023_2024.groupby('quarter')['daily_return'].apply(
    lambda x: (1 + x).prod() - 1
).reset_index()

quarterly_returns.columns = ['quarter', 'quarterly_return']

# Split by year
quarterly_returns_2023 = quarterly_returns[quarterly_returns['quarter'].dt.year == 2023]
quarterly_returns_2024 = quarterly_returns[quarterly_returns['quarter'].dt.year == 2024]

# Display
print("2023 Quarterly Returns:\n", quarterly_returns_2023)
print("\n2024 Quarterly Returns:\n", quarterly_returns_2024)

2023 Quarterly Returns:
   quarter  quarterly_return
0  2023Q2         -0.136216
1  2023Q3          0.106151
2  2023Q4         -0.065247

2024 Quarterly Returns:
   quarter  quarterly_return
3  2024Q1          0.090410
4  2024Q2         -0.216600
5  2024Q3         -0.042931
6  2024Q4         -0.097665


In [11]:

vol_returns = pd.merge(returns_df, volume, left_index=True, right_index=True)

In [12]:
import pandas_datareader.data as web
import datetime

start = datetime.datetime.now() - datetime.timedelta(days=2*365)
end = datetime.datetime.now()

vix_data = web.DataReader("VIXCLS", "fred", start, end)
print(vix_data.head())

            VIXCLS
DATE              
2023-04-18   16.83
2023-04-19   16.46
2023-04-20   17.17
2023-04-21   16.77
2023-04-24   16.89


Sentiment Data

In [13]:
vol_returns['date'] = pd.date_range(start='2023-04-12', periods=len(vol_returns), freq='B')  # Adjust start date accordingly
# Set the new 'date' column as the index
vol_returns.set_index('date', inplace=True)


# Now merge using the date index
returns_x_VIX_df = pd.merge(vol_returns, vix_data, left_index=True, right_index=True, how='inner')
print(returns_x_VIX_df.index.max())

2025-03-12 00:00:00


In [14]:
# Convert 'date' column in merged_df to datetime format to ensure compatibility
merged_df['date'] = pd.to_datetime(merged_df['date'])

# Merge the dataframes on the 'date' column
final_df = pd.merge(returns_x_VIX_df, merged_df, left_index=True, right_on='date', how='inner')

# Display the merged dataframe
print(final_df)

      adjClose  adj_close     volume  VIXCLS       date  avg_tone
6     4.427056     198.85  184395900   16.83 2023-04-18  2.298309
7   -13.291426     172.42  120859500   16.46 2023-04-19  2.288625
8     5.243011     181.46  160466300   17.17 2023-04-20  2.281591
9     3.813513     188.38  125910913   16.77 2023-04-21  2.309622
12    7.861769     203.19  103419006   16.89 2023-04-24  2.311514
..         ...        ...        ...     ...        ...       ...
694   0.956023     163.68   41949600   24.87 2025-03-06  2.265221
695  -0.189394     163.37   58337341   23.37 2025-03-07  2.259146
698   0.991614     164.99   52456400   27.86 2025-03-10  2.290376
699   0.587914     165.96   47720200   26.92 2025-03-11  2.291383
700  -0.692938     164.81   49923008   24.23 2025-03-12  2.227599

[497 rows x 6 columns]


In [15]:
final_df = final_df.rename(columns={'adjClose':'returns'})
final_df

Unnamed: 0,returns,adj_close,volume,VIXCLS,date,avg_tone
6,4.427056,198.85,184395900,16.83,2023-04-18,2.298309
7,-13.291426,172.42,120859500,16.46,2023-04-19,2.288625
8,5.243011,181.46,160466300,17.17,2023-04-20,2.281591
9,3.813513,188.38,125910913,16.77,2023-04-21,2.309622
12,7.861769,203.19,103419006,16.89,2023-04-24,2.311514
...,...,...,...,...,...,...
694,0.956023,163.68,41949600,24.87,2025-03-06,2.265221
695,-0.189394,163.37,58337341,23.37,2025-03-07,2.259146
698,0.991614,164.99,52456400,27.86,2025-03-10,2.290376
699,0.587914,165.96,47720200,26.92,2025-03-11,2.291383


In [16]:
import plotly.graph_objects as go
import pandas as pd

# Prepare data
df = final_df.copy()
df['cumulative_return'] = df['adj_close'].abs()
df['smoothed_tone'] = df['avg_tone'].rolling(window=7, min_periods=1).mean()

# Create figure
fig = go.Figure()

# Add cumulative return line (left y-axis)
fig.add_trace(go.Scatter(
    x=df['date'],
    y=df['cumulative_return'],
    name='Apple Cumulative Return',
    yaxis='y1',
    line=dict(color='#5B8FB9', width=3.5)  # Increased line width to make it thicker
))

# Add smoothed sentiment line (right y-axis)
fig.add_trace(go.Scatter(
    x=df['date'],
    y=df['smoothed_tone'],
    name='Sentiment Score (Smoothed)',
    yaxis='y2',
    line=dict(color='#B6EADA', width=3.5)  # Matches your theme palette
))

# Layout with dark theme
fig.update_layout(
    title='Global Sentiment vs Apple Performance',
    plot_bgcolor='#03001C',
    paper_bgcolor='#03001C',
    font=dict(color='white', size=20),
    title_font=dict(color='white'),

    xaxis=dict(
        showgrid=True,
        gridcolor='rgba(255,255,255,0.05)'  # Very faint gridlines
    ),
    
    yaxis=dict(
        title='Cumulative Return',
        titlefont=dict(color='#5B8FB9'),
        tickfont=dict(color='#5B8FB9'),
        showgrid=True,
        gridcolor='rgba(255,255,255,0.05)',
        zeroline=False,
        zerolinecolor='white',
        zerolinewidth=0.5
    ),
    
    yaxis2=dict(
        title='Sentiment Score',
        titlefont=dict(color='#B6EADA'),
        tickfont=dict(color='#B6EADA'),
        overlaying='y',
        side='right',
        showgrid=False,
        zeroline=False
    ),

    legend=dict(x=0.75, y=0.99, xanchor='center'),
    height=500,
    margin=dict(l=60, r=60, t=60, b=40)
)

fig.show()

In [42]:
import plotly.express as px

colorscale=[
    [0.0, "#4a00e0"],   # deep purple
    [0.25, "#005eff"],  # electric blue
    [0.5, "#00f7ff"],   # bright cyan
    [0.75, "#a6ff00"],  # neon lime
    [1.0, "#f4ff00"]    # yellow-green (high mood)
]
df['abs_returns'] = df['returns'].abs()
df['returns_scaled'] = df['abs_returns'] / df['abs_returns'].max() * 100 + 4.2 # Scale to 0-100 for bubble size

custom_colors_d = ['#03001C', '#2E3B84', '#B6EADA', '#2E3B84', '#03001C']

def sentiment_to_emoji(score):
    if score > 2.34:
        return "😊"
    elif score < 2.0:
        return "😡"
    else:
        return "😐"

df['sentiment_emoji'] = df['avg_tone'].apply(sentiment_to_emoji)

fig = px.scatter(
    df,
    x='avg_tone',
    y='VIXCLS',
    size='returns_scaled',  # Replace with your daily return column
    color='avg_tone',
    color_continuous_scale=colorscale,
    title='VIX vs Sentiment',
    labels={
        'avg_tone': 'Sentiment Score',
        'VIXCLS': 'VIX (Fear Gauge)',
        'daily_return': 'Apple Daily Return (%)'
    },
    height=600
)
fig.update_yaxes(range=[30, 0])
# Layout customization for dark mode + centered axes
fig.update_layout(
    plot_bgcolor='#03001C',
    paper_bgcolor='#03001C',
    font=dict(color='white', size = 20),
    title_font=dict(color='white', size=20),
    coloraxis_colorbar=dict(
        title='Sentiment',
        tickvals=[1.5, 2.5, 3.5],  # Adjusted for your tone scale
        ticktext=["😡", "😐", "😊"],
        tickmode='array'
    ),
    
    xaxis=dict(
        title='Sentiment Score',
        showgrid=True,
        gridcolor='#1C1C1C',
        zeroline=True,
        zerolinecolor='gray',
        zerolinewidth=1,
    ),
    yaxis=dict(
        title='VIX',
        showgrid=True,
        gridcolor='#1C1C1C',
        zeroline=True,
        zerolinecolor='gray',
        zerolinewidth=1
    ),
    margin=dict(l=60, r=60, t=60, b=60)
)

x_center = df['avg_tone'].mean()
y_center = df['VIXCLS'].mean()

# Add vertical and horizontal lines to create quadrant view
fig.add_shape(type="line",
              x0=x_center, x1=x_center,
              y0=0, y1=df['VIXCLS'].max(),
              line=dict(color="white", width=1, dash="dash"))

fig.add_shape(type="line",
              x0=df['avg_tone'].min(), x1=df['avg_tone'].max(),
              y0=y_center, y1=y_center,
              line=dict(color="white", width=1, dash="dash"))

fig.add_annotation(text="😡 Negative News",
                   x=x_center - 0.05, y=y_center + 10,
                   showarrow=False, font=dict(color="white", size=20))

fig.add_annotation(text="😐 Neutral News",
                   x=x_center - 0.05, y=y_center - 7,
                   showarrow=False, font=dict(color="white", size=20))

fig.add_annotation(text="😊 Positive News",
                   x=x_center + 0.05, y=y_center - 7,
                   showarrow=False, font=dict(color="white", size=20))

fig.add_annotation(text="🤯 Uncertainty",
                   x=x_center + 0.05, y=y_center + 10,
                   showarrow=False, font=dict(color="white", size=20))


# Annotations
fig.add_annotation(
    text="Bubble size = Apple Daily Return",
    xref="paper", yref="paper",
    x=1.15, y=-0.14,
    showarrow=False,
    font=dict(color="white", size=15)
)

# Add white border around bubbles
fig.update_traces(marker=dict(line=dict(width=0.55, color='white')))

fig.show()

In [155]:
import plotly.graph_objects as go

# Define function to map tone to emojis
def tone_to_emoji(tone):
    if tone > 2.35:
        return '😃'  # Positive sentiment
    elif tone > 2.25:
        return '😐'  # Neutral sentiment
    else:
        return '😡'  # Negative sentiment

# Apply the function to the DataFrame
df['tone_emoji'] = df['avg_tone'].apply(tone_to_emoji)

# Get the value counts for the sentiment emojis
emoji_labels = ['😡', '😐', '😃']
emoji_counts = df['tone_emoji'].value_counts().reindex(emoji_labels, fill_value=0)

# Create a pie chart with emojis as labels
fig = go.Figure(data=[go.Pie(
    labels=emoji_counts.index,  # Emojis as labels
    values=emoji_counts.values,
    text=emoji_counts.index,# Counts for each emoji
    textinfo='label+percent',
    textposition='inside',# Display both label and percentage
    insidetextorientation='auto',# Display on hover
    marker=dict(colors=['#B6EADA', '#2E3B84', '#03001Z']),
    textfont=dict(size=32)# Custom color scheme (can adjust)
)])

# Update layout for a cleaner look
fig.update_layout(
    title='Sentiment Distribution Based on Tone',
    showlegend=False,  # Hide legend since the emojis themselves act as labels
    font=dict(color='white', size=20),
    plot_bgcolor='#03001C',  # Dark background
    paper_bgcolor='#03001C'  # Dark background for the paper as well
)

# Show the plot
fig.show()

In [28]:
extremes = df.loc[df['adjClose'].abs().nlargest(5).index]
for _, row in extremes.iterrows():
    print(f"{row['date']}: Return={row['adjClose']:.2f}%, Tone={row['avg_tone']:.2f}, VIX={row['VIXCLS']:.2f} {tone_to_emoji(row['avg_tone'])}")


2023-04-13 00:00:00: Return=-13.29%, Tone=2.32, VIX=17.80 😐
2023-04-19 00:00:00: Return=10.19%, Tone=2.29, VIX=16.46 😐
2023-04-18 00:00:00: Return=7.86%, Tone=2.30, VIX=16.83 😐
2024-01-29 00:00:00: Return=-6.78%, Tone=2.34, VIX=13.60 😐
2024-03-05 00:00:00: Return=-5.64%, Tone=2.29, VIX=14.46 😐


In [77]:
import plotly.graph_objects as go

# Data
quarters = ['Q1 2023', 'Q2 2023', 'Q3 2023', 'Q4 2023',
            'Q1 2024', 'Q2 2024', 'Q3 2024', 'Q4 2024']
pe_ratios = [27.9, 32.44, 27.79, 29.8, 26.54, 31.96, 37.28, 40.44]
eps_growth = [-0.2, 1.2, 3, 4.9, 0, 2, -7.3, 3.4]
returns = [14.45, -14.0933, 10.6151, -6.5247, 9.0410, -21.6600, -4.2931, -9.7665]

# Create grouped bar chart
fig = go.Figure(data=[
    go.Bar(name='P/E Ratio', x=quarters, y=pe_ratios, marker_color='#00FFFF',text=[f'{v:.1f}' for v in pe_ratios],
        textposition='outside'),
    go.Bar(name='EPS Growth (%)', x=quarters, y=eps_growth, marker_color='#1ABC9C',text=[f'{v:.1f}%' for v in eps_growth],
        textposition='outside'),
    go.Bar(name='Stock Return (%)', x=quarters, y=returns, marker_color='#FF4C4C',text=[f'{v:.1f}%' for v in returns],
        textposition='outside')
])

# Update layout for grouping and slanting x-axis labels
fig.update_layout(
    barmode='group',
    title='Apple Valuation vs. Returns',
    yaxis_title='Value',
    template='plotly_dark',
    height=600,
    legend=dict(x=0.6, y=1.22),
    xaxis=dict(tickangle=-45),
    font=dict(size=20),
    plot_bgcolor='#03001C',
    paper_bgcolor='#03001C'
)

# Show figure
fig.show()

Dual Axis Line

Heatmap

Word Cloud 

Bubble Chart 