In [3]:
import pandas as pd
# Load sentiment model output
sentiment_df = pd.read_csv("C:/Users/ishan/Desktop/ISHANAY/BU docs/Spring 2025/Financial_analytics/Project/FinancialScorePredictor_UsingSentimentAnalysis/data/ModelDataFile.csv")
sentiment_df["date"] = pd.to_datetime(sentiment_df["date"])
sentiment_df["year"] = sentiment_df["date"].dt.year
sentiment_df["ticker"] = sentiment_df["ticker"].str.upper()

fund_df = pd.read_csv("C:/Users/ishan/Desktop/ISHANAY/BU docs/Spring 2025/Financial_analytics/Project/FinancialScorePredictor_UsingSentimentAnalysis/data/fundamental_scores_wrds.csv")
fund_df["datadate"] = pd.to_datetime(fund_df["datadate"])
fund_df["year"] = fund_df["datadate"].dt.year

In [None]:
merged_df = pd.merge(sentiment_df, fund_df, how="left", left_on=["ticker", "year"], right_on=["tic", "year"])
# Filling missing financial scores with neutral 0.5
merged_df["fundamental_score"] = merged_df["fundamental_score"].fillna(0.5)

# Replacing null sentiment scores with 0 to handle bias
sentiment_columns = ["final_sentiment_score", "sentiment_1d", "sentiment_3d_avg", "sentiment_7d_avg"]
merged_df[sentiment_columns] = merged_df[sentiment_columns].fillna(0)

In [5]:
merged_df.head()

Unnamed: 0,date,ticker,price,volume,daily_return,dividend,low_bid,high_ask,sp500_return,news_score,...,score_pe,score_pb,score_ev_ebitda,score_roce,score_margin,score_turnover,score_inventory,score_accruals,score_dte,score_cov
0,2022-03-10,ORCL,76.65,13812596.0,0.007227,,75.03,76.85,-0.004292,-1.0,...,0.336283,0.975477,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238
1,2022-06-13,ORCL,64.05,16547246.0,-0.046023,,63.76,66.1,-0.038768,1.0,...,0.336283,0.975477,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238
2,2022-06-16,ORCL,68.71,10685987.0,-0.014204,,67.58,68.96,-0.032512,0.0,...,0.336283,0.975477,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238
3,2022-07-27,ORCL,75.87,6677226.0,0.024855,,74.32,76.12,0.026156,0.0,...,0.336283,0.975477,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238
4,2022-09-12,ORCL,77.08,15628850.0,0.015413,,76.27,77.3771,0.010584,-0.497464,...,0.336283,0.975477,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238


In [10]:
# Defining sentiment-based verdict function
def sentiment_verdict(score, threshold=0.1):
    if score >= threshold:
        return "Buy"
    elif score <= -threshold:
        return "Sell"
    else:
        return "Hold"

merged_df["sentiment_verdict"] = merged_df["final_sentiment_score"].apply(sentiment_verdict)

# Combining sentiment & fundamentals into final verdict
def final_verdict(row):
    sentiment = row["sentiment_verdict"]
    fscore = row["fundamental_score"]

    if sentiment == "Buy":
        if fscore >= 0.85:
            return "Strong Buy"
        elif fscore >= 0.65:
            return "Buy"
        else:
            return "Leaning Buy"

    elif sentiment == "Sell":
        if fscore <= 0.15:
            return "Strong Sell"
        elif fscore <= 0.35:
            return "Sell"
        else:
            return "Leaning Sell"

    elif sentiment == "Hold":
        if fscore >= 0.7:
            return "Leaning Buy"
        elif fscore <= 0.3:
            return "Leaning Sell"
        else:
            return "Hold"

merged_df["final_verdict"] = merged_df.apply(final_verdict, axis=1)

In [11]:
merged_df.head()

Unnamed: 0,date,ticker,price,volume,daily_return,dividend,low_bid,high_ask,sp500_return,news_score,...,score_ev_ebitda,score_roce,score_margin,score_turnover,score_inventory,score_accruals,score_dte,score_cov,sentiment_verdict,final_verdict
0,2022-03-10,ORCL,76.65,13812596.0,0.007227,,75.03,76.85,-0.004292,-1.0,...,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238,Sell,Leaning Sell
1,2022-06-13,ORCL,64.05,16547246.0,-0.046023,,63.76,66.1,-0.038768,1.0,...,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238,Buy,Leaning Buy
2,2022-06-16,ORCL,68.71,10685987.0,-0.014204,,67.58,68.96,-0.032512,0.0,...,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238,Buy,Leaning Buy
3,2022-07-27,ORCL,75.87,6677226.0,0.024855,,74.32,76.12,0.026156,0.0,...,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238,Buy,Leaning Buy
4,2022-09-12,ORCL,77.08,15628850.0,0.015413,,76.27,77.3771,0.010584,-0.497464,...,0.628093,0.751186,0.61502,0.363329,0.603186,0.843031,0.988475,0.364238,Sell,Leaning Sell


In [12]:
verdict_counts = merged_df["final_verdict"].value_counts()

# Percentage of each verdict
verdict_percent = merged_df["final_verdict"].value_counts(normalize=True) * 100

# Combine into a summary DataFrame
distribution = pd.DataFrame({
    "Count": verdict_counts,
    "Percentage": verdict_percent.round(2)
})

print("📊 Final Verdict Distribution:")
print(distribution)

📊 Final Verdict Distribution:
               Count  Percentage
final_verdict                   
Leaning Buy     4875       50.87
Leaning Sell    3338       34.83
Hold             853        8.90
Buy              390        4.07
Sell             128        1.34


In [13]:
merged_df.to_csv("C:/Users/ishan/Desktop/ISHANAY/BU docs/Spring 2025/Financial_analytics/Project/FinancialScorePredictor_UsingSentimentAnalysis/data/combined_verdict_with_fundamentals.csv", index=False)