In [1]:
%%capture
!pip install pandas

In [2]:
%%capture
!pip install openai==0.28

### Mount drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Step 2: Load Your Data


In [3]:
import openai
import pandas as pd
import os

# Load the labeled dataset
#df = pd.read_excel("/content/drive/MyDrive/Colab-Notebooks/Thesis/labeled_sentiment.xlsx")
df = pd.read_excel("/content/drive/MyDrive/Colab-Notebooks/Thesis/final_SPX500_data.xlsx")

# Display the first rows
print(df.head())

from google.colab import userdata
api_key = userdata.get('GPT_KEY')
os.environ["OPENAI_API_KEY"] = userdata.get('GPT_KEY')


# Use it with OpenAI
openai.api_key = api_key

    Author_Handle                Date  \
0     SteelTrades 2022-01-03 20:44:58   
1          SethCL 2022-01-06 12:40:06   
2     SteelTrades 2022-01-11 18:18:41   
3    John_Papadak 2022-01-13 14:15:24   
4  tumi_VTMarkets 2022-01-13 07:17:55   

                                              X_Post  Reply_Count  \
0  Phase 2 Sling Long 4777\n\nLearn how to read m...            0   
1  Merrill Lynch: \n\n🔹3 Rate Hikes Does Not Equa...            1   
2  Sling Flat +9.50\n\nLearn how to read my notes...            0   
3  Booking half of the long on SPX in Tesseract f...            0   
4  #SPX500 has formed a higher high after rejecti...            0   

   Repost_Count  Like_Count  View_Count  Follower_Count  Verified_Status  \
0             0           0           0             605                0   
1             0           5           0           50500                1   
2             1           0           0             605                0   
3             0           0       

## Step 3: Define the Few-Shot Prompt
We include a few labeled examples in the prompt, then ask the model to classify a new tweet.


In [4]:
import openai
import pandas as pd

def classify_sentiment(tweet):
    prompt = f"""
    You are a financial sentiment classifier specializing in tweets about SPX500.
    Classify the sentiment of the following tweet as **Positive, Neutral, or Negative** based on its impact on SPX500.

    **Examples:**
    - "SPX500 crashes after Fed decision. Investors worried." → Negative
    - "Tech stocks are rebounding! $AAPL $GOOGL #SPX500" → Positive
    - "Markets remain neutral ahead of Fed decision. #SPX500" → Neutral
    - "Private equity had a blast on 2021. Amazing returns VS $spy #SPX500" → Negative

    **Tweet:** "{tweet}"
    Sentiment:
    """

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a financial sentiment classifier specializing in tweets about SPX500."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=10  # Limit output tokens for sentiment classification
    )

    # Extract and clean the sentiment from the response
    sentiment = response.choices[0].message.content.strip()
    return sentiment

# Example test
tweet = "SPX500 at all-time highs. Bullish trend ahead!"
print(classify_sentiment(tweet))  # Expected Output: Positive

Positive


## Step 4: Apply Few-Shot Learning to All Tweets
Now, we classify all tweets in your dataset.

In [9]:
# Classify sentiment for all labeled tweets
df["GPT Sentiment"] = df["X_Post"].apply(classify_sentiment)

# Save results
df.to_excel("labeled_sentiment_with_gpt.xlsx", index=False)

# Display a few comparisons
print(df[["X_Post", "Manual Sentiment", "GPT Sentiment"]].head())

                                              X_Post Manual Sentiment  \
0  Phase 2 Sling Long 4777\n\nLearn how to read m...          neutral   
1  Merrill Lynch: \n\n🔹3 Rate Hikes Does Not Equa...         positive   
2  Sling Flat +9.50\n\nLearn how to read my notes...          neutral   
3  Booking half of the long on SPX in Tesseract f...         positive   
4  #SPX500 has formed a higher high after rejecti...         positive   

  GPT Sentiment  
0       Neutral  
1      Positive  
2       Neutral  
3      Positive  
4      Positive  


In [8]:
os.chdir("/content/drive/MyDrive/Colab-Notebooks/Thesis")
os.getcwd()

'/content/drive/MyDrive/Colab-Notebooks/Thesis'

## 4. Evaluate GPT-4's Performance
We now compare GPT-4 vs. manual labels to measure accuracy:

In [10]:
from sklearn.metrics import accuracy_score

# Ensure both columns are strings (avoid NaN issues)
df = df.dropna(subset=["Manual Sentiment", "GPT Sentiment"])
df["Manual Sentiment"] = df["Manual Sentiment"].astype(str).str.lower()
df["GPT Sentiment"] = df["GPT Sentiment"].astype(str).str.lower()

# Calculate accuracy
accuracy = accuracy_score(df["Manual Sentiment"], df["GPT Sentiment"])
print(f"GPT-4 Sentiment Accuracy: {accuracy:.2%}")

GPT-4 Sentiment Accuracy: 86.79%


In [11]:
from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, matthews_corrcoef, precision_score, recall_score

# Ensure both columns are strings (avoid NaN issues)
df = df.dropna(subset=["Manual Sentiment", "GPT Sentiment"])
df["Manual Sentiment"] = df["Manual Sentiment"].astype(str).str.lower()
df["GPT Sentiment"] = df["GPT Sentiment"].astype(str).str.lower()

# Calculate metrics
accuracy = accuracy_score(df["Manual Sentiment"], df["GPT Sentiment"])
f1 = f1_score(df["Manual Sentiment"], df["GPT Sentiment"], average='weighted') # Use 'weighted' for multi-class
balanced_accuracy = balanced_accuracy_score(df["Manual Sentiment"], df["GPT Sentiment"])
mcc = matthews_corrcoef(df["Manual Sentiment"], df["GPT Sentiment"])
precision = precision_score(df["Manual Sentiment"], df["GPT Sentiment"], average='weighted')
recall = recall_score(df["Manual Sentiment"], df["GPT Sentiment"], average='weighted')

# Print results
print(f"GPT-4 Sentiment Accuracy: {accuracy:.2%}")
print(f"GPT-4 Sentiment F1 Score: {f1:.2%}")
print(f"GPT-4 Sentiment Balanced Accuracy: {balanced_accuracy:.2%}")
print(f"GPT-4 Sentiment Matthews Correlation Coefficient (MCC): {mcc:.2%}")
print(f"GPT-4 Sentiment Precision: {precision:.2%}")
print(f"GPT-4 Sentiment Recall: {recall:.2%}")

GPT-4 Sentiment Accuracy: 86.79%
GPT-4 Sentiment F1 Score: 86.95%
GPT-4 Sentiment Balanced Accuracy: 86.82%
GPT-4 Sentiment Matthews Correlation Coefficient (MCC): 80.97%
GPT-4 Sentiment Precision: 88.78%
GPT-4 Sentiment Recall: 86.79%
