In [1]:
import pandas as pd


sentiment_path = "/content/fear_greed_index.csv"
sentiment_df = pd.read_csv(sentiment_path)


print("Shape:", sentiment_df.shape)
sentiment_df.head(10)


Shape: (2644, 4)


Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05
5,1517895000,8,Extreme Fear,2018-02-06
6,1517981400,36,Fear,2018-02-07
7,1518067800,30,Fear,2018-02-08
8,1518154200,44,Fear,2018-02-09
9,1518240600,54,Neutral,2018-02-10


**Clean the Sentiment Dataset**

In [2]:
# Standardize column names
sentiment_df.columns = sentiment_df.columns.str.lower().str.strip()

# Convert date to datetime
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

# Remove duplicates
sentiment_df.drop_duplicates(inplace=True)

sentiment_df.head()


Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


**Check the Sentiment Distribution**

In [3]:
sentiment_df['classification'].value_counts()


Unnamed: 0_level_0,count
classification,Unnamed: 1_level_1
Fear,781
Greed,633
Extreme Fear,508
Neutral,396
Extreme Greed,326


**Feature Engineering**

In [6]:

# Make a working copy
df = sentiment_df.copy()

# Ensure DATE column is in datetime format
df["date"] = pd.to_datetime(df["date"], errors="coerce")

# 4.1 Create numeric sentiment score
sentiment_map = {
    "Extreme Fear": -2,
    "Fear": -1,
    "Neutral": 0,
    "Greed": 1,
    "Extreme Greed": 2
}

df["sentiment_score"] = df["classification"].map(sentiment_map)

# 4.2 Create date-related features
df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.month
df["week"] = df["date"].dt.isocalendar().week
df["day_of_week"] = df["date"].dt.day_name()

# 4.3 Weekend flag
df["is_weekend"] = df["day_of_week"].isin(["Saturday", "Sunday"])

# 4.4 Broad grouping (Fear → Negative, Greed → Positive)
def get_group(x):
    if x in ["Extreme Fear", "Fear"]:
        return "Negative Sentiment"
    elif x in ["Greed", "Extreme Greed"]:
        return "Positive Sentiment"
    return "Neutral Sentiment"

df["sentiment_group"] = df["classification"].apply(get_group)

# Preview final engineered dataset
df.head(10)

Unnamed: 0,timestamp,value,classification,date,sentiment_score,year,month,week,day_of_week,is_weekend,sentiment_group
0,1517463000,30,Fear,2018-02-01,-1,2018,2,5,Thursday,False,Negative Sentiment
1,1517549400,15,Extreme Fear,2018-02-02,-2,2018,2,5,Friday,False,Negative Sentiment
2,1517635800,40,Fear,2018-02-03,-1,2018,2,5,Saturday,True,Negative Sentiment
3,1517722200,24,Extreme Fear,2018-02-04,-2,2018,2,5,Sunday,True,Negative Sentiment
4,1517808600,11,Extreme Fear,2018-02-05,-2,2018,2,6,Monday,False,Negative Sentiment
5,1517895000,8,Extreme Fear,2018-02-06,-2,2018,2,6,Tuesday,False,Negative Sentiment
6,1517981400,36,Fear,2018-02-07,-1,2018,2,6,Wednesday,False,Negative Sentiment
7,1518067800,30,Fear,2018-02-08,-1,2018,2,6,Thursday,False,Negative Sentiment
8,1518154200,44,Fear,2018-02-09,-1,2018,2,6,Friday,False,Negative Sentiment
9,1518240600,54,Neutral,2018-02-10,0,2018,2,6,Saturday,True,Neutral Sentiment


**Sentiment Aggregations**

In [7]:
daily_sentiment = df.groupby("date")[["value", "sentiment_score"]].mean()
daily_sentiment.head()


Unnamed: 0_level_0,value,sentiment_score
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-01,30.0,-1.0
2018-02-02,15.0,-2.0
2018-02-03,40.0,-1.0
2018-02-04,24.0,-2.0
2018-02-05,11.0,-2.0


**Weekly sentiment trend**

In [8]:
weekly_sentiment = df.groupby("week")["sentiment_score"].mean()
weekly_sentiment.head()


Unnamed: 0_level_0,sentiment_score
week,Unnamed: 1_level_1
1,-0.122449
2,-0.040816
3,0.061224
4,0.102041
5,0.207547


**Monthly sentiment trend**

In [9]:
monthly_sentiment = df.groupby(["year", "month"])["sentiment_score"].mean()
monthly_sentiment.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,sentiment_score
year,month,Unnamed: 2_level_1
2018,2,-0.571429
2018,3,-0.935484
2018,4,-1.148148
2018,5,-0.548387
2018,6,-1.533333


**Yearly sentiment trend**

In [10]:
yearly_sentiment = df.groupby("year")["sentiment_score"].mean()
yearly_sentiment


Unnamed: 0_level_0,sentiment_score
year,Unnamed: 1_level_1
2018,-1.181269
2019,-0.334247
2020,0.07377
2021,0.252055
2022,-1.446575
2023,0.323288
2024,0.827397
2025,-0.081967


**Count of each sentiment group**

In [12]:
sentiment_group_dist = df["sentiment_group"].value_counts()
sentiment_group_dist


Unnamed: 0_level_0,count
sentiment_group,Unnamed: 1_level_1
Negative Sentiment,1289
Positive Sentiment,959
Neutral Sentiment,396


In [13]:
df["sentiment_7d"] = df["sentiment_score"].rolling(7).mean()
df["sentiment_30d"] = df["sentiment_score"].rolling(30).mean()
df["sentiment_90d"] = df["sentiment_score"].rolling(90).mean()
df[["date", "sentiment_score", "sentiment_7d", "sentiment_30d"]].head(20)


Unnamed: 0,date,sentiment_score,sentiment_7d,sentiment_30d
0,2018-02-01,-1,,
1,2018-02-02,-2,,
2,2018-02-03,-1,,
3,2018-02-04,-2,,
4,2018-02-05,-2,,
5,2018-02-06,-2,,
6,2018-02-07,-1,-1.571429,
7,2018-02-08,-1,-1.571429,
8,2018-02-09,-1,-1.428571,
9,2018-02-10,0,-1.285714,


In [15]:
import numpy as np
df["sentiment_peak"] = np.where(
    (df["sentiment_score"] > df["sentiment_score"].shift(1)) &
    (df["sentiment_score"] > df["sentiment_score"].shift(-1)),
    1, 0
)

df["sentiment_trough"] = np.where(
    (df["sentiment_score"] < df["sentiment_score"].shift(1)) &
    (df["sentiment_score"] < df["sentiment_score"].shift(-1)),
    1, 0
)

df[["date", "sentiment_score", "sentiment_peak", "sentiment_trough"]].head(20)


Unnamed: 0,date,sentiment_score,sentiment_peak,sentiment_trough
0,2018-02-01,-1,0,0
1,2018-02-02,-2,0,1
2,2018-02-03,-1,1,0
3,2018-02-04,-2,0,0
4,2018-02-05,-2,0,0
5,2018-02-06,-2,0,0
6,2018-02-07,-1,0,0
7,2018-02-08,-1,0,0
8,2018-02-09,-1,0,0
9,2018-02-10,0,1,0


**Streaks of fear & greed**

In [16]:
df["fear"] = df["classification"].isin(["Fear", "Extreme Fear"]).astype(int)
df["greed"] = df["classification"].isin(["Greed", "Extreme Greed"]).astype(int)

df["fear_streak"] = df["fear"] * (df["fear"].groupby((df["fear"] != df["fear"].shift()).cumsum()).cumcount() + 1)
df["greed_streak"] = df["greed"] * (df["greed"].groupby((df["greed"] != df["greed"].shift()).cumsum()).cumcount() + 1)

df[["date", "fear_streak", "greed_streak"]].head(30)


Unnamed: 0,date,fear_streak,greed_streak
0,2018-02-01,1,0
1,2018-02-02,2,0
2,2018-02-03,3,0
3,2018-02-04,4,0
4,2018-02-05,5,0
5,2018-02-06,6,0
6,2018-02-07,7,0
7,2018-02-08,8,0
8,2018-02-09,9,0
9,2018-02-10,0,0
