## Visualisaion

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor

# Download stock data
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
start_date = '2020-01-01'
end_date = '2023-12-31'

data = yf.download(stocks, start=start_date, end=end_date)
if 'Adj Close' in data.columns:
    data = data['Adj Close']
else:
    data = data['Close']
returns = data.pct_change().dropna()

# 1. Stock Price Trends
plt.figure(figsize=(12,6))
for stock in stocks:
    plt.plot(data.index, data[stock], label=stock)
plt.title('Stock Price Trends (2020-2023)')
plt.xlabel('Date')
plt.ylabel('Stock Price (USD)')
plt.legend()
plt.show()

# 2. Correlation Heatmap
plt.figure(figsize=(10,6))
sns.heatmap(returns.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Between Stock Returns')
plt.show()

# 3. Distribution of Stock Returns
plt.figure(figsize=(10,6))
for stock in stocks:
    sns.kdeplot(returns[stock], label=stock)
plt.title('Stock Return Distributions')
plt.xlabel('Daily Return')
plt.ylabel('Density')
plt.legend()
plt.show()

# 4. PCA for Dimensionality Reduction
scaler = StandardScaler()
scaled_returns = scaler.fit_transform(returns.dropna())
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_returns)

plt.figure(figsize=(8,6))
sns.scatterplot(x=pca_result[:,0], y=pca_result[:,1], alpha=0.7)
plt.title('PCA: First Two Principal Components')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# 5. K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(pca_result)
labels = kmeans.labels_

plt.figure(figsize=(8,6))
sns.scatterplot(x=pca_result[:,0], y=pca_result[:,1], hue=labels, palette='Set1')
plt.title('K-Means Clustering of Stocks')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# 6. Feature Importance (Random Forest)
X = returns.iloc[:-1, :]
y = returns.shift(-1).iloc[:-1, :]['AAPL']  # Predict Apple stock
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

importance = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
plt.figure(figsize=(10,6))
importance.plot(kind='bar')
plt.title('Feature Importance for Stock Prediction (Random Forest)')
plt.xlabel('Stock Features')
plt.ylabel('Importance')
plt.show()


## Run1

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.svm import SVR

# Download stock data
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']  # Add more as needed
start_date = '2020-01-01'
end_date = '2024-12-31'
data = yf.download(stocks, start=start_date, end=end_date)
if 'Adj Close' in data.columns:
    data = data['Adj Close']
else:
    data = data['Close']
returns = data.pct_change().dropna()

# Feature Engineering
X = returns.iloc[:-1, :]  # Features (all but last day)
y = returns.shift(-1).iloc[:-1, :]  # Target (next day's return)

# Split data into training and testing sets

# Prepare 2024 data for prediction
test_start_date = '2024-01-01'
test_end_date = '2024-12-31'
test_data = yf.download(stocks, start=test_start_date, end=test_end_date)
if 'Adj Close' in test_data.columns:
    test_data = test_data['Adj Close']
else:
    test_data = test_data['Close']
test_returns = test_data.pct_change().dropna()
X_2024 = test_returns.iloc[:-1, :]
y_actual_2024 = test_returns.shift(-1).iloc[:-1, :]
scaler = StandardScaler()
X_2024_scaled = scaler.fit_transform(X_2024)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 1. Support Vector Regression (SVR) Model
svm_model = SVR(kernel='rbf', C=5, gamma=0.1)
svm_model.fit(X_train_scaled, y_train.mean(axis=1))
svm_preds = svm_model.predict(X_test_scaled)
svm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), svm_preds))
print(f"SVR RMSE: {svm_rmse:.4f}")

# 2. Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=300, max_depth=10, min_samples_split=5, random_state=42)
rf_model.fit(X_train_scaled, y_train.mean(axis=1))
rf_preds = rf_model.predict(X_test_scaled)
rf_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), rf_preds))
print(f"Random Forest RMSE: {rf_rmse:.4f}")

# 3. Long Short-Term Memory (LSTM) Model
X_lstm = np.expand_dims(X_train_scaled, axis=-1)  # Reshape for LSTM input
X_lstm_test = np.expand_dims(X_test_scaled, axis=-1)

lstm_model = Sequential([
    LSTM(100, activation='tanh', return_sequences=True, input_shape=(X_lstm.shape[1], 1)),
    LSTM(100, activation='tanh'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, y_train.mean(axis=1), epochs=20, batch_size=8, verbose=1)

lstm_preds = lstm_model.predict(X_lstm_test)
lstm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), lstm_preds))
print(f"LSTM RMSE: {lstm_rmse:.4f}")

# Predict 2024 data
svm_preds_2024 = svm_model.predict(X_2024_scaled)
svm_preds_2024 = (svm_preds_2024 > 0.5).astype(int)  # Ensure binary classification output
rf_preds_2024 = rf_model.predict(X_2024_scaled)
lstm_preds_2024 = lstm_model.predict(np.expand_dims(X_2024_scaled, axis=-1))
lstm_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), lstm_preds_2024))
rf_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), rf_preds_2024))
print(f"LSTM RMSE for 2024: {lstm_rmse_2024:.4f}")
print(f"Random Forest RMSE for 2024: {rf_rmse_2024:.4f}")

# Visualizing 2024 Predictions
plt.figure(figsize=(10, 6))
plt.plot(y_actual_2024.mean(axis=1).values, label='Actual 2024 Returns', linestyle='dotted')
plt.plot(range(len(svm_preds_2024)), svm_preds_2024, label="SVM Prediction 2024")
plt.plot(rf_preds_2024, label="Random Forest Prediction 2024")
plt.plot(lstm_preds_2024, label="LSTM Prediction 2024")
plt.legend()
plt.title('Stock Return Predictions for 2024')
plt.xticks(ticks=np.linspace(0, len(y_actual_2024), num=6), labels=['Jan 2024', 'Mar 2024', 'May 2024', 'Jul 2024', 'Sep 2024', 'Nov 2024'])
plt.ylabel('Return')
plt.show()



# Visualizing Results
plt.figure(figsize=(10, 6))
plt.plot(svm_preds, label="SVR Prediction")
plt.plot(rf_preds, label="Random Forest Prediction")
plt.plot(lstm_preds, label="LSTM Prediction")
plt.legend()
plt.title('Stock Return Predictions')
plt.xticks(ticks=np.linspace(0, len(y_test), num=8), labels=['Jan 2020', 'Jul 2020', 'Jan 2021', 'Jul 2021', 'Jan 2022', 'Jul 2022', 'Jan 2023', 'Jul 2023'])
plt.ylabel('Return')
plt.show()


## Run2

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR  # Changed SVC to SVR for regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Download stock data
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
start_date = '2020-01-01'
end_date = '2024-12-31'
data = yf.download(stocks, start=start_date, end=end_date)
if 'Adj Close' in data.columns:
    data = data['Adj Close']
else:
    data = data['Close']
returns = data.pct_change().dropna()

# Feature Engineering
X = returns.iloc[:-1, :]
y = returns.shift(-1).iloc[:-1, :]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 1. Support Vector Regression (SVR) Model
svm_model = SVR(kernel='rbf', C=20, gamma=0.005, epsilon=0.0005)  # Changed to SVR for regression
svm_model.fit(X_train_scaled, y_train.mean(axis=1))
svm_preds = svm_model.predict(X_test_scaled)
svm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), svm_preds))
print(f"SVR RMSE: {svm_rmse:.4f}")

# 2. Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=300, max_depth=10, min_samples_split=5, random_state=42)
rf_model.fit(X_train_scaled, y_train.mean(axis=1))
rf_preds = rf_model.predict(X_test_scaled)
rf_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), rf_preds))
print(f"Random Forest RMSE: {rf_rmse:.4f}")

# 3. Long Short-Term Memory (LSTM) Model
X_lstm = np.expand_dims(X_train_scaled, axis=-1)
X_lstm_test = np.expand_dims(X_test_scaled, axis=-1)

lstm_model = Sequential([
    LSTM(100, activation='tanh', return_sequences=True, input_shape=(X_lstm.shape[1], 1)),
    Dropout(0.2),
    LSTM(100, activation='tanh'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, y_train.mean(axis=1), epochs=20, batch_size=8, verbose=1)

lstm_preds = lstm_model.predict(X_lstm_test)
lstm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), lstm_preds))
print(f"LSTM RMSE: {lstm_rmse:.4f}")

# Predict 2024 data

test_start_date = '2024-01-01'
test_end_date = '2024-12-31'
test_data = yf.download(stocks, start=test_start_date, end=test_end_date)
if 'Adj Close' in test_data.columns:
    test_data = test_data['Adj Close']
else:
    test_data = test_data['Close']
test_returns = test_data.pct_change().dropna()
X_2024 = test_returns.iloc[:-1, :]
y_actual_2024 = test_returns.shift(-1).iloc[:-1, :]
X_2024_scaled = scaler.transform(X_2024)

# Predict with SVR
svm_preds_2024 = svm_model.predict(X_2024_scaled)
svm_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), svm_preds_2024))
print(f"SVR RMSE for 2024: {svm_rmse_2024:.4f}")

# Predict with Random Forest
rf_preds_2024 = rf_model.predict(X_2024_scaled)
rf_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), rf_preds_2024))
print(f"Random Forest RMSE for 2024: {rf_rmse_2024:.4f}")

# Predict with LSTM
lstm_preds_2024 = lstm_model.predict(np.expand_dims(X_2024_scaled, axis=-1))
lstm_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), lstm_preds_2024))
print(f"LSTM RMSE for 2024: {lstm_rmse_2024:.4f}")

# Visualization

plt.figure(figsize=(10, 6))
plt.plot(y_actual_2024.mean(axis=1).values, label='Actual 2024 Returns', linestyle='dotted')
plt.plot(svm_preds_2024, label='SVR Prediction 2024')
plt.plot(rf_preds_2024, label='Random Forest Prediction 2024')
plt.plot(lstm_preds_2024, label='LSTM Prediction 2024')
plt.legend()
plt.title('Stock Return Predictions for 2024')
plt.xticks(ticks=np.linspace(0, len(y_actual_2024), num=6), labels=['Jan 2024', 'Mar 2024', 'May 2024', 'Jul 2024', 'Sep 2024', 'Nov 2024'])
plt.ylabel('Return')
plt.show()

# Existing visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test.mean(axis=1).values, label='Actual Returns', linestyle="dashed")
plt.plot(svm_preds, label="SVR Prediction")
plt.plot(rf_preds, label="Random Forest Prediction")
plt.plot(lstm_preds, label="LSTM Prediction")
plt.legend()
plt.title('Stock Return Predictions')
plt.xticks(ticks=np.linspace(0, len(y_test), num=8), labels=['Jan 2020', 'Jul 2020', 'Jan 2021', 'Jul 2021', 'Jan 2022', 'Jul 2022', 'Jan 2023', 'Jul 2023'])
plt.ylabel('Return')
plt.show()


## Run3

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR  # Changed SVC to SVR for regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Download stock data
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
start_date = '2020-01-01'
end_date = '2024-12-31'
data = yf.download(stocks, start=start_date, end=end_date)
if 'Adj Close' in data.columns:
    data = data['Adj Close']
else:
    data = data['Close']
returns = data.pct_change().dropna()

# Feature Engineering
X = returns.iloc[:-1, :]
y = returns.shift(-1).iloc[:-1, :]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 1. Support Vector Regression (SVR) Model
svm_model = SVR(kernel='rbf', C=15, gamma=0.01, epsilon=0.001)  # Changed to SVR for regression
svm_model.fit(X_train_scaled, y_train.mean(axis=1))
svm_preds = svm_model.predict(X_test_scaled)
svm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), svm_preds))
print(f"SVR RMSE: {svm_rmse:.4f}")



# 2. Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=500, max_depth=20, min_samples_split=2, random_state=42)
rf_model.fit(X_train_scaled, y_train.mean(axis=1))
rf_preds = rf_model.predict(X_test_scaled)
rf_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), rf_preds))
print(f"Random Forest RMSE: {rf_rmse:.4f}")

# 3. Long Short-Term Memory (LSTM) Model
X_lstm = np.expand_dims(X_train_scaled, axis=-1)
X_lstm_test = np.expand_dims(X_test_scaled, axis=-1)

lstm_model = Sequential([
    LSTM(200, activation='tanh', return_sequences=True, input_shape=(X_lstm.shape[1], 1)),
    LSTM(200, activation='tanh', return_sequences=True),
    LSTM(200, activation='tanh'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, y_train.mean(axis=1), epochs=50, batch_size=4, verbose=1)



lstm_preds = lstm_model.predict(X_lstm_test)
lstm_rmse = np.sqrt(mean_squared_error(y_test.mean(axis=1), lstm_preds))
print(f"LSTM RMSE: {lstm_rmse:.4f}")

# Predict 2024 data

test_start_date = '2024-01-01'
test_end_date = '2024-12-31'
test_data = yf.download(stocks, start=test_start_date, end=test_end_date)
if 'Adj Close' in test_data.columns:
    test_data = test_data['Adj Close']
else:
    test_data = test_data['Close']
test_returns = test_data.pct_change().dropna()
X_2024 = test_returns.iloc[:-1, :]
y_actual_2024 = test_returns.shift(-1).iloc[:-1, :]
X_2024_scaled = scaler.transform(X_2024)

# Predict with SVR
svm_preds_2024 = svm_model.predict(X_2024_scaled)
svm_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), svm_preds_2024))
print(f"SVR RMSE for 2024: {svm_rmse_2024:.4f}")

# Predict with Random Forest
rf_preds_2024 = rf_model.predict(X_2024_scaled)
rf_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), rf_preds_2024))
print(f"Random Forest RMSE for 2024: {rf_rmse_2024:.4f}")

# Predict with LSTM
lstm_preds_2024 = lstm_model.predict(np.expand_dims(X_2024_scaled, axis=-1))
lstm_rmse_2024 = np.sqrt(mean_squared_error(y_actual_2024.mean(axis=1), lstm_preds_2024))
print(f"LSTM RMSE for 2024: {lstm_rmse_2024:.4f}")

# Visualization

plt.figure(figsize=(10, 6))
plt.plot(y_actual_2024.mean(axis=1).values, label='Actual 2024 Returns', linestyle='dotted')
plt.plot(svm_preds_2024, label='SVR Prediction 2024')
plt.plot(rf_preds_2024, label='Random Forest Prediction 2024')
plt.plot(lstm_preds_2024, label='LSTM Prediction 2024')
plt.legend()
plt.title('Stock Return Predictions for 2024')
plt.xticks(ticks=np.linspace(0, len(y_actual_2024), num=6), labels=['Jan 2024', 'Mar 2024', 'May 2024', 'Jul 2024', 'Sep 2024', 'Nov 2024'])
plt.ylabel('Return')
plt.show()

# Existing visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test.mean(axis=1).values, label='Actual Returns', linestyle='dotted')
plt.plot(svm_preds, label="SVR Prediction")
plt.plot(rf_preds, label="Random Forest Prediction")
plt.plot(lstm_preds, label="LSTM Prediction")
plt.legend()
plt.title('Stock Return Predictions')
plt.xticks(ticks=np.linspace(0, len(y_test), num=8), labels=['Jan 2020', 'Jul 2020', 'Jan 2021', 'Jul 2021', 'Jan 2022', 'Jul 2022', 'Jan 2023', 'Jul 2023'])
plt.ylabel('Return')
plt.show()

#run3