In [None]:
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

plt.style.use('ggplot')
sns.set(font_scale=1.2)

#nltk.download('vader_lexicon')

main_ticker = "TSLA"
peers = ["NVDA", "MSFT", "AMZN"]
tickers = [main_ticker] + peers

stock = yf.Ticker(main_ticker)

def fetch_news_sentiment(ticker):
    url = f"https://finance.yahoo.com/quote/{ticker}?p={ticker}"
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    headlines = soup.find_all('h3')
    sia = SentimentIntensityAnalyzer()
    sentiments = []
    for h in headlines[:100]:
        text = h.get_text()
        score = sia.polarity_scores(text)['compound']
        sentiments.append(score)
    return np.mean(sentiments) if sentiments else 0

start_date = "2020-01-01"
end_date = "2025-05-29"
hist_data = stock.history(start=start_date, end=end_date).dropna()

hist_data['Close_lag1'] = hist_data['Close'].shift(1)
hist_data['MA10'] = hist_data['Close'].rolling(window=10).mean()
hist_data['MA50'] = hist_data['Close'].rolling(window=50).mean()

delta = hist_data['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=15).mean()
avg_loss = loss.rolling(window=15).mean()
rs = avg_gain / avg_loss
hist_data['RSI'] = 100 - (100 / (1 + rs))

hist_data['News_Sentiment'] = fetch_news_sentiment(main_ticker)

hist_data.dropna(inplace=True)

X = hist_data[['Open', 'High', 'Low', 'Volume', 'Close_lag1', 'MA10', 'MA50', 'RSI', 'News_Sentiment']]
y = hist_data['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
last_day_data = hist_data.tail(1)[['Open', 'High', 'Low', 'Volume', 'Close_lag1', 'MA10', 'MA50', 'RSI', 'News_Sentiment']]
last_day_scaled = scaler.transform(last_day_data)
predicted_close = model.predict(last_day_scaled)[0]

print(f"Predicted Close Price for {main_ticker} on May 30, 2025")
print(f"Predicted Close: ${predicted_close:.2f} Â± 2% \n")

# [1] Historical Price Plot (5 years)
hist_5y = stock.history(start="2020-01-01", end="2025-05-30")
plt.figure(figsize=(14, 7))
plt.plot(hist_5y.index, hist_5y['Close'], label="Close Price", color='blue')
plt.title(f"{main_ticker} Closing Prices - Last 5 Years")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig('historical_price.png')
plt.show()
plt.close()

# [2] Close Price Comparison Plot
plt.figure(figsize=(14, 7))
for ticker in tickers:
    hist_peer = yf.Ticker(ticker).history(start="2020-01-01", end="2025-05-31")
    plt.plot(hist_peer.index, hist_peer['Close'], label=ticker)
plt.title("5-Year Close Price Comparison")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig('close_price_comparison.png')
plt.show()
plt.close()

# [2.1] All Companies vs S&P 500 Close Price Comparison (1-Year)
one_year_start = "2024-05-31"
one_year_end = "2025-05-31"
sp500_1y = yf.Ticker("^GSPC").history(start=one_year_start, end=one_year_end)
sp500_close = sp500_1y['Close']
plt.figure(figsize=(16, 8))
# Loop through each ticker and plot
for ticker in tickers:
    data = yf.Ticker(ticker).history(start=one_year_start, end=one_year_end)
    plt.plot(data.index, data['Close'], label=f"{ticker} Close")
# Add S&P 500 to the same plot
plt.plot(sp500_close.index, sp500_close, label='S&P 500 (^GSPC) Close', color='black', linewidth=2.5, linestyle='--')
plt.title("1-Year Close Price Comparison: Companies vs S&P 500")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig('all_vs_sp500_1y.png')
plt.show()
plt.close()

# [3] EPS, P/E, Dividend Yield Plot
data = []
for ticker in tickers:
    t = yf.Ticker(ticker)
    try:
        info = t.info
        eps = info.get("trailingEps", 0)
        pe = info.get("trailingPE", 0)
        dy = info.get("dividendYield", 0) * 100 if info.get("dividendYield") else 0
        data.append([ticker, eps, pe, dy])
    except:
        data.append([ticker, 0, 0, 0])

df_metrics = pd.DataFrame(data, columns=["Ticker", "EPS", "P/E", "DividendYield"])
df_metrics.set_index("Ticker", inplace=True)

fig, axs = plt.subplots(1, 3, figsize=(18, 6))
df_metrics["EPS"].plot(kind="bar", ax=axs[0], title="EPS")
df_metrics["P/E"].plot(kind="bar", ax=axs[1], title="P/E Ratio")
df_metrics["DividendYield"].plot(kind="bar", ax=axs[2], title="Dividend Yield (%)")
plt.tight_layout()
plt.savefig('metrics_comparison.png')
plt.show()
plt.close()

# [4] Moving Averages Plot
hist_ma = stock.history(start="2020-01-01", end="2025-05-31").dropna()
hist_ma['MA20'] = hist_ma['Close'].rolling(window=20).mean()
hist_ma['MA50'] = hist_ma['Close'].rolling(window=50).mean()

plt.figure(figsize=(14, 7))
plt.plot(hist_ma.index, hist_ma['Close'], label='Close Price')
plt.plot(hist_ma.index, hist_ma['MA20'], label='20-Day MA')
plt.plot(hist_ma.index, hist_ma['MA50'], label='50-Day MA')
plt.title(f"{main_ticker} Close Price with Moving Averages")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig('moving_averages.png')
plt.show()
plt.close()

# [7] Actual vs Predicted (Test Set)
plt.figure(figsize=(14, 7))
plt.scatter(y_test.index, y_test, label='Actual Close', color='blue', alpha=0.5)
plt.scatter(y_test.index, y_pred, label='Predicted Close', color='red', alpha=0.5)
plt.title("Actual vs Predicted Close Prices")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.savefig('actual_vs_predicted.png')
plt.show()
plt.close()

# [8] RSI Visualization
plt.figure(figsize=(14, 4))
plt.plot(hist_data.index, hist_data['RSI'], label='RSI (5)', color='darkorange')
plt.axhline(70, color='red', linestyle='--', label='Overbought')
plt.axhline(30, color='green', linestyle='--', label='Oversold')
plt.title(f"{main_ticker} Relative Strength Index (RSI)")
plt.ylabel("RSI")
plt.legend()
plt.tight_layout()
plt.show()

# [9] Price vs Volume
fig, ax1 = plt.subplots(figsize=(14, 7))
color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel('Close Price (USD)', color=color)
ax1.plot(hist_data.index, hist_data['Close'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Volume', color=color)
ax2.fill_between(hist_data.index, hist_data['Volume'], color=color, alpha=0.3)
ax2.tick_params(axis='y', labelcolor=color)

plt.title(f"{main_ticker} Price vs Volume (Last 5 Years)")
fig.tight_layout()
plt.show()


# [10] Naive Model Accuracy (Close_lag1 as prediction baseline)
hist_acc = hist_data.copy()
hist_acc['Predicted_Close'] = hist_acc['Close_lag1']
hist_acc.dropna(inplace=True)

mae = mean_absolute_error(hist_acc['Close'], hist_acc['Predicted_Close'])
mse = mean_squared_error(hist_acc['Close'], hist_acc['Predicted_Close'])
r2 = r2_score(hist_acc['Close'], hist_acc['Predicted_Close'])
accuracy = 100 - (mae / hist_acc['Close'].mean() * 100)

print("\n Accuracy Metrics")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"R2 Score: {r2:.4f}")
print(f"Accuracy Estimate: {accuracy:.2f}%")

# [11] Recommendations (if available)
try:
    recs = stock.recommendations_summary
    if recs is not None:
        print("\n Recommendations ")
        if 'To Grade' in recs.columns:
            print(recs[['To Grade', 'Firm', 'Action']].dropna().head())
        else:
            print("\n No 'To Grade' column found. Showing preview: ")
            print(recs.head())
    else:
        print("\n No recommendation summary available. ")
except Exception as e:
    print("\n Error fetching recommendations:", e)

# [12] Financials
try:
    financials = stock.financials
    if not financials.empty:
        print("\n Latest Financials ")
        print(financials.head())
    else:
        print("\n No financial data available.")
except Exception as e:
    print("\n Error fetching financials: ", e)

# [13] Net Income & Profitability Status
try:
    income_stmt = stock.income_stmt
    if not income_stmt.empty and 'Net Income' in income_stmt.index:
        print("\n Net Income from Income Statement ")
        net_income = income_stmt.loc['Net Income']
        print(net_income)
        latest_ni = net_income.iloc[0]
        if latest_ni > 0:
            print("\nProfitability Status:  Growing (positive net income)")
        else:
            print("\nProfitability Status:  In Deficit (negative net income)")
    else:
        print("Net Income data not available in income statement.")
except Exception as e:
    print("Error fetching income statement:", e)

