In [2]:
!pip install FPDF

^C





[notice] A new release of pip is available: 23.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from fpdf import FPDF
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [None]:
data = [
    {'Date': '2025-06-09', 'Open': 1300.00, 'High': 1316.50, 'Low': 1280.00, 'Close': 1315.20, 'Volume': 330300, 'VWAP': 1301.63, 'Beta': 1.19, 'Mkt_Cap': 14483, 'EPS': 55.45, 'PE': 23.52, 'PB': 2.82, 'Book_Value': 462.95},
    {'Date': '2025-06-10', 'Open': 1315.00, 'High': 1325.00, 'Low': 1295.00, 'Close': 1320.45, 'Volume': 340100, 'VWAP': 1310.50, 'Beta': 1.20, 'Mkt_Cap': 14520, 'EPS': 55.80, 'PE': 23.65, 'PB': 2.85, 'Book_Value': 463.10},
    {'Date': '2025-06-11', 'Open': 1321.00, 'High': 1330.00, 'Low': 1305.00, 'Close': 1325.85, 'Volume': 345000, 'VWAP': 1315.00, 'Beta': 1.18, 'Mkt_Cap': 14585, 'EPS': 55.70, 'PE': 23.58, 'PB': 2.83, 'Book_Value': 462.80},
    {'Date': '2025-06-12', 'Open': 1326.00, 'High': 1340.00, 'Low': 1310.00, 'Close': 1335.30, 'Volume': 355000, 'VWAP': 1320.50, 'Beta': 1.22, 'Mkt_Cap': 14650, 'EPS': 56.00, 'PE': 23.75, 'PB': 2.88, 'Book_Value': 464.00},
    {'Date': '2025-06-13', 'Open': 1336.00, 'High': 1350.00, 'Low': 1320.00, 'Close': 1345.70, 'Volume': 360500, 'VWAP': 1325.60, 'Beta': 1.23, 'Mkt_Cap': 14730, 'EPS': 56.20, 'PE': 23.90, 'PB': 2.90, 'Book_Value': 465.20},
]

market_df = pd.DataFrame(data)
market_df['Date'] = pd.to_datetime(market_df['Date'])
market_df.set_index('Date', inplace=True)
market_df['returns'] = market_df['Close'].pct_change()

In [None]:
news_summary = """
Speaker AI want to get your take on on Google here. It seems to me that they still do need to convince investors that they can win this AI race.
And I think the big driver here is that Sergey is very much back engaged and is focused on the AI side of the house at Google.
The data suggests that chat GPT's clearly the leader, but Gemini's usage is not far behind open AI and chat GPT.
The verticalization of AI certainly creates opportunities for new entrants.
"""

blob = TextBlob(news_summary)
sentiment_score = blob.sentiment.polarity
market_df['sentiment'] = sentiment_score

In [None]:
avg_return = market_df['returns'].mean()

if sentiment_score > 0.3 and avg_return > 0:
    signal = "BUY"
elif sentiment_score < -0.3 and avg_return < 0:
    signal = "SELL"
else:
    signal = "HOLD"

market_df['signal'] = [signal] * len(market_df)

In [None]:
market_df['label'] = market_df['signal'].map({"BUY": 0, "HOLD": 1, "SELL": 2})

features = ['VWAP', 'PE', 'PB', 'EPS', 'returns', 'sentiment']
df_clean = market_df.dropna(subset=features + ['label'])
X = df_clean[features]
y = df_clean['label']

model = RandomForestClassifier()
model.fit(X, y)
pred = model.predict(X.iloc[-1:].values)
pred_signal = {0: "BUY", 1: "HOLD", 2: "SELL"}[pred[0]]



In [None]:
user_age = 28
def age_based_strategy(age, base_signal):
    if age < 25:
        return "Aggressive SIP or Intraday Buy" if base_signal == "BUY" else "Consider SIP or Watch Closely" if base_signal == "HOLD" else "Avoid or Short-term Trade Only"
    elif 25 <= age <= 40:
        return "SIP or Short-Term Buy" if base_signal == "BUY" else "Hold & Monitor" if base_signal == "HOLD" else "Avoid New Entry"
    else:
        return "Long-Term Hold with Caution" if base_signal == "BUY" else "Only if Already Holding" if base_signal == "HOLD" else "Avoid Risky Exposure"

strategy = age_based_strategy(user_age, pred_signal)

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(10, 4))
sns.lineplot(data=market_df, x=market_df.index, y="Close", marker="o")
plt.title("GOOG Closing Prices Over Time")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("price_trend.png")
plt.close()

plt.figure(figsize=(6, 4))
sns.heatmap(market_df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Matrix")
plt.tight_layout()
plt.savefig("correlation_matrix.png")
plt.close()

In [None]:
importances = model.feature_importances_
feat_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)

plt.figure(figsize=(8, 4))
sns.barplot(x='Importance', y='Feature', data=feat_df)
plt.title("Feature Importance from Random Forest")
plt.tight_layout()
plt.savefig("feature_importance.png")
plt.close()

In [None]:
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)

pdf.cell(200, 10, txt="Google Investment Analysis Report (with ML)", ln=True, align='C')
pdf.ln(10)
pdf.multi_cell(0, 10, txt=f"Sentiment Score: {sentiment_score:.2f}\nAverage Return: {avg_return:.4f}\nRule-Based Signal: {signal}\nML Predicted Signal: {pred_signal}\nAge ({user_age})-Based Strategy: {strategy}")

pdf.image("price_trend.png", w=180)
pdf.ln(5)
pdf.image("correlation_matrix.png", w=180)
pdf.ln(5)
pdf.image("feature_importance.png", w=180)

pdf.output("Google_Investment_Analysis_with_ML.pdf")
print("✅ PDF Report Generated: Google_Investment_Analysis_with_ML.pdf")

✅ PDF Report Generated: Google_Investment_Analysis_with_ML.pdf
