# Exploratory Analysis
This notebook visualizes results from all implemented ML models.

In [None]:
# Visualize Linear Regression results
model = joblib.load('../models/linear_regression.pkl')
df = pd.read_csv('../data/tech_stocks_data.csv')
df = df.dropna(subset=['PE Ratio', 'Return on Equity', 'Beta', 'EPS', 'Current Price'])
X = df[['PE Ratio', 'Return on Equity', 'Beta', 'EPS']]
df['Predicted Price'] = model.predict(X)
df[['Ticker', 'Current Price', 'Predicted Price']].head(10)

In [None]:
# Visualize Random Forest results
model = joblib.load('../models/random_forest_regressor.pkl')
df = pd.read_csv('../data/tech_stocks_data.csv')
df = df.dropna(subset=['PE Ratio', 'Return on Equity', 'Beta', 'EPS', 'Current Price'])
X = df[['PE Ratio', 'Return on Equity', 'Beta', 'EPS']]
df['Predicted Price'] = model.predict(X)
df[['Ticker', 'Current Price', 'Predicted Price']].head(10)

In [None]:
# Visualize XGBoost results
model = joblib.load('../models/xgboost_regressor.pkl')
df = pd.read_csv('../data/tech_stocks_data.csv')
df = df.dropna(subset=['PE Ratio', 'Return on Equity', 'Beta', 'EPS', 'Current Price'])
X = df[['PE Ratio', 'Return on Equity', 'Beta', 'EPS']]
df['Predicted Price'] = model.predict(X)
df[['Ticker', 'Current Price', 'Predicted Price']].head(10)

In [None]:
# Visualize ARIMA results
model = joblib.load('../models/arima_forecast.pkl')
forecast = model.forecast(steps=5)
forecast

In [None]:
# Visualize Logistic Regression results
model = joblib.load('../models/logistic_regression.pkl')
df = pd.read_csv('../data/tech_stocks_data.csv')
df['Price Change'] = df['Current Price'].diff().shift(-1)
df['Movement'] = df['Price Change'].apply(lambda x: 1 if x > 0 else 0)
df = df.dropna(subset=['PE Ratio', 'Return on Equity', 'Beta', 'EPS', 'Movement'])
X = df[['PE Ratio', 'Return on Equity', 'Beta', 'EPS']]
df['Predicted Movement'] = model.predict(X)
df[['Ticker', 'Movement', 'Predicted Movement']].head(10)

In [None]:
# Visualize LSTM results
from keras.models import load_model
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
model = load_model('../models/lstm_forecast.keras')
df = yf.download('AAPL', period='6mo', interval='1d')[['Close']].dropna()
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df)
input_seq = scaled[-10:].reshape((1, 10, 1))
next_day_scaled = model.predict(input_seq, verbose=0)
next_day_price = scaler.inverse_transform(next_day_scaled)[0][0]
print(f"Predicted next day's closing price: ${next_day_price:.2f}")

In [None]:
# Visualize k-Means Clustering results
model = joblib.load('../models/kmeans.pkl')
df = pd.read_csv('../data/tech_stocks_data.csv')
df = df.dropna(subset=['PE Ratio', 'Return on Equity', 'Beta', 'Dividend Yield'])
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
features = df[['PE Ratio', 'Return on Equity', 'Beta', 'Dividend Yield']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)
df['Cluster'] = model.predict(X_scaled)
plt.figure(figsize=(10,6))
scatter = plt.scatter(df['PE Ratio'], df['Return on Equity'], c=df['Cluster'], cmap='viridis', s=50)
plt.title('k-Means Clustering of Stocks')
plt.xlabel('PE Ratio')
plt.ylabel('Return on Equity')
plt.colorbar(scatter, label='Cluster')
plt.grid(True)
plt.tight_layout()
plt.show()