In [1]:
# Import required libraries
import pandas as pd
import yfinance as yf
from alpha_vantage.timeseries import TimeSeries
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from pymongo import MongoClient
import numpy as np
from scipy.optimize import minimize
import statsmodels.api as sm
import ipywidgets as widgets
from IPython.display import display, HTML
import matplotlib.pyplot as plt


In [2]:
# Function to fetch stock data from Yahoo Finance
def fetch_yahoo_finance_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

# Function to fetch data from Alpha Vantage
def fetch_alpha_vantage_data(ticker, api_key):
    ts = TimeSeries(key=api_key, output_format='pandas')
    data, meta_data = ts.get_daily(symbol=ticker, outputsize='full')
    return data


In [3]:
# Function to clean the data
def clean_data(data):
    data = data.drop_duplicates()
    data = data.fillna(method='ffill').fillna(method='bfill')
    return data

# Function to normalize the data
def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return normalized_data

# Function to standardize the data
def standardize_data(data):
    scaler = StandardScaler()
    standardized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return standardized_data


In [4]:
# Function to save DataFrame to MongoDB
def save_to_mongo(collection_name, data, db):
    collection = db[collection_name]
    collection.delete_many({})  # Clear existing data
    collection.insert_many(data.reset_index().to_dict('records'))

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['financial_data']


In [5]:
# Fetch, clean, normalize, and standardize data
yahoo_data = fetch_yahoo_finance_data('AAPL', '2020-01-01', '2023-01-01')
alpha_data = fetch_alpha_vantage_data('AAPL', 'USLF3P1QRCK9TLCC')
cleaned_yahoo_data = clean_data(yahoo_data)
cleaned_alpha_data = clean_data(alpha_data)
normalized_yahoo_data = normalize_data(cleaned_yahoo_data)
standardized_yahoo_data = standardize_data(cleaned_yahoo_data)

# Save the data to MongoDB
save_to_mongo('yahoo_finance', cleaned_yahoo_data, db)
save_to_mongo('alpha_vantage', cleaned_alpha_data, db)
save_to_mongo('normalized_yahoo_finance', normalized_yahoo_data, db)
save_to_mongo('standardized_yahoo_finance', standardized_yahoo_data, db)

# Split data into training and testing sets
X = standardized_yahoo_data.drop('Close', axis=1)
y = standardized_yahoo_data['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)

# Evaluate the models
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
gb_mse = mean_squared_error(y_test, gb_predictions)
gb_r2 = r2_score(y_test, gb_predictions)

print(f"Random Forest - MSE: {rf_mse}, R2: {rf_r2}")
print(f"Gradient Boosting - MSE: {gb_mse}, R2: {gb_r2}")


[*********************100%%**********************]  1 of 1 completed
  data = data.fillna(method='ffill').fillna(method='bfill')


Random Forest - MSE: 0.00048421203025467683, R2: 0.9995544638971565
Gradient Boosting - MSE: 0.00043620965950665514, R2: 0.9995986321289517


In [6]:
# Function to forecast future values using trained models
def forecast_future_values(model, data, periods=30):
    # Ensure that the data has no missing values
    data = data.ffill().bfill()
    
    # Generate future dates
    last_date = data.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=periods, freq='D')
    future_data = pd.DataFrame(index=future_dates)
    
    # Ensure the future_data DataFrame has the same columns as data
    future_features = pd.DataFrame(index=future_dates, columns=data.columns)
    
    # Fill the future_features DataFrame with the last known values
    for col in data.columns:
        future_features[col] = data[col].iloc[-1]
    
    # Reindex future_features to ensure it has all columns
    future_features = future_features.fillna(0)
    
    # Align feature names to match those used during training
    if hasattr(model, 'feature_names_in_'):
        missing_cols = set(model.feature_names_in_) - set(future_features.columns)
        if missing_cols:
            raise ValueError(f"Feature names missing in future data: {missing_cols}")
    
    # Ensure future_features has the correct columns order
    future_features = future_features[model.feature_names_in_]
    
    # Make predictions
    future_predictions = model.predict(future_features)
    
    return future_data, future_predictions

# Forecast future values
rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_yahoo_data)
gb_future_data, gb_future_predictions = forecast_future_values(gb_model, cleaned_yahoo_data)


In [7]:
# Function to save DataFrame to CSV
def save_to_csv(df, filename):
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Save results to CSV
save_to_csv(cleaned_yahoo_data, 'cleaned_data.csv')
save_to_csv(normalized_yahoo_data, 'normalized_data.csv')
save_to_csv(standardized_yahoo_data, 'standardized_data.csv')
save_to_csv(pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions}), 'future_predictions.csv')


Data saved to cleaned_data.csv
Data saved to normalized_data.csv
Data saved to standardized_data.csv
Data saved to future_predictions.csv


In [8]:
# Function to calculate portfolio return and risk
def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    return portfolio_return, portfolio_risk

# Function to minimize the negative Sharpe ratio
def minimize_sharpe_ratio(weights, returns, risk_free_rate=0.01):
    portfolio_return, portfolio_risk = portfolio_performance(weights, returns)
    return - (portfolio_return - risk_free_rate) / portfolio_risk

# Optimization constraints
def optimize_portfolio(returns):
    num_assets = len(returns.columns)
    initial_weights = num_assets * [1. / num_assets]
    bounds = tuple((0, 1) for asset in range(num_assets))
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    
    result = minimize(minimize_sharpe_ratio, initial_weights, args=(returns,), method='SLSQP', bounds=bounds, constraints=constraints)
    return result.x

# Example usage
historical_returns = cleaned_yahoo_data.pct_change().dropna()
optimal_weights = optimize_portfolio(historical_returns)

print(f"Optimal portfolio weights: {optimal_weights}")


Optimal portfolio weights: [0.00000000e+00 4.54775626e-18 8.91904395e-01 0.00000000e+00
 0.00000000e+00 1.08095605e-01]


In [9]:
# Anomaly detection function
def detect_anomalies(data):
    residuals = sm.tsa.ARIMA(data, order=(1, 0, 0)).fit().resid
    std_residuals = np.std(residuals)
    anomalies = residuals[np.abs(residuals) > 3 * std_residuals]
    return anomalies

# Monte Carlo simulation function
def monte_carlo_simulation(returns, num_simulations=1000):
    simulation_results = []
    for _ in range(num_simulations):
        simulated_returns = np.random.choice(returns, size=len(returns))
        simulated_cumulative_returns = np.cumprod(1 + simulated_returns) - 1
        simulation_results.append(simulated_cumulative_returns[-1])
    return simulation_results

# Example usage
anomalies = detect_anomalies(cleaned_yahoo_data['Close'])
simulation_results = monte_carlo_simulation(historical_returns['Close'])

print(f"Detected anomalies: {anomalies}")
print(f"Monte Carlo simulation results: {simulation_results[:10]}")


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Detected anomalies: Date
2020-01-02   -43.661170
2020-07-31     9.986509
2020-09-03   -10.473174
2022-01-28    11.259786
2022-10-28    11.036419
2022-11-10    12.059666
dtype: float64
Monte Carlo simulation results: [3.5224411779332154, 0.9135045360115304, 0.7777819569384445, 0.9624955917571822, 0.4488943567418513, 1.1264682766308853, -0.10669153345227222, 2.3318799308775744, 0.8750042054194229, -0.20236755405753626]


In [10]:
# Plot Stock Prices
def plot_stock_prices(data):
    plt.figure(figsize=(12, 6))
    plt.plot(data.index, data['Close'], label='Close Price')
    plt.title('Stock Prices Over Time')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    plt.show()

# Plot Future Predictions
def plot_future_predictions(future_df):
    plt.figure(figsize=(12, 6))
    plt.plot(future_df['Date'], future_df['Predicted'], label='Predicted Future Prices', color='orange')
    plt.title('Future Price Predictions')
    plt.xlabel('Date')
    plt.ylabel('Predicted Price')
    plt.legend()
    plt.grid(True)
    plt.show()

# User Interface with Download Options
def create_user_interface_with_downloads():
    ticker_input = widgets.Text(description='Ticker:')
    start_date_input = widgets.Text(description='Start Date:')
    end_date_input = widgets.Text(description='End Date:')
    submit_button = widgets.Button(description='Submit')
    
    def on_button_click(b):
        ticker = ticker_input.value
        start_date = start_date_input.value
        end_date = end_date_input.value
        data = fetch_yahoo_finance_data(ticker, start_date, end_date)
        cleaned_data = clean_data(data)
        normalized_data = normalize_data(cleaned_data)
        standardized_data = standardize_data(cleaned_data)
        
        # Save results to CSV
        save_to_csv(cleaned_data, 'cleaned_data.csv')
        save_to_csv(normalized_data, 'normalized_data.csv')
        save_to_csv(standardized_data, 'standardized_data.csv')
        
        # Forecast future values
        rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_data)
        future_df = pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions})
        save_to_csv(future_df, 'future_predictions.csv')
        
        # Display cleaned data
        display(HTML("<h3>Cleaned Data</h3>"))
        display(cleaned_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Cleaned Data"))

        # Display normalized data
        display(HTML("<h3>Normalized Data</h3>"))
        display(normalized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Normalized Data"))

        # Display standardized data
        display(HTML("<h3>Standardized Data</h3>"))
        display(standardized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Standardized Data"))
        
        # Display future predictions
        display(HTML("<h3>Future Predictions</h3>"))
        display(future_df.style.set_table_attributes('class="table table-striped"').set_caption("Future Predictions"))

        # Plot visualizations
        plot_stock_prices(cleaned_data)
        plot_future_predictions(future_df)
        
        # Provide download links
        display(HTML('<h3>Download Links</h3>'))
        display(HTML('<a href="cleaned_data.csv" download>Download Cleaned Data CSV</a>'))
        display(HTML('<a href="normalized_data.csv" download>Download Normalized Data CSV</a>'))
        display(HTML('<a href="standardized_data.csv" download>Download Standardized Data CSV</a>'))
        display(HTML('<a href="future_predictions.csv" download>Download Future Predictions CSV</a>'))
    
    submit_button.on_click(on_button_click)
    
    display(ticker_input, start_date_input, end_date_input, submit_button)

create_user_interface_with_downloads()


Text(value='', description='Ticker:')

Text(value='', description='Start Date:')

Text(value='', description='End Date:')

Button(description='Submit', style=ButtonStyle())

In [11]:
# Function to analyze historical performance
def analyze_historical_performance(data):
    mean_return = data['Close'].pct_change().mean()
    volatility = data['Close'].pct_change().std()
    return mean_return, volatility

# Example usage
mean_return, volatility = analyze_historical_performance(cleaned_yahoo_data)
print(f"Mean Return: {mean_return}")
print(f"Volatility: {volatility}")


Mean Return: 0.0009966930451517286
Volatility: 0.023268382096358276


In [12]:
# Function to evaluate forecasted prices
def evaluate_forecasted_prices(predictions, threshold=0.05):
    expected_return = np.mean(predictions)
    if expected_return > threshold:
        recommendation = "Buy"
    else:
        recommendation = "Do not buy"
    return expected_return, recommendation

# Example usage
expected_return_rf, recommendation_rf = evaluate_forecasted_prices(rf_future_predictions)
expected_return_gb, recommendation_gb = evaluate_forecasted_prices(gb_future_predictions)
print(f"Random Forest Expected Return: {expected_return_rf}, Recommendation: {recommendation_rf}")
print(f"Gradient Boosting Expected Return: {expected_return_gb}, Recommendation: {recommendation_gb}")


Random Forest Expected Return: 1.6617898249176748, Recommendation: Buy
Gradient Boosting Expected Return: 1.6890635319662575, Recommendation: Buy


In [13]:
# Function to calculate Sharpe ratio
def calculate_sharpe_ratio(returns, risk_free_rate=0.01):
    mean_return = np.mean(returns)
    volatility = np.std(returns)
    sharpe_ratio = (mean_return - risk_free_rate) / volatility
    return sharpe_ratio

# Example usage
sharpe_ratio_rf = calculate_sharpe_ratio(rf_future_predictions)
sharpe_ratio_gb = calculate_sharpe_ratio(gb_future_predictions)
print(f"Random Forest Sharpe Ratio: {sharpe_ratio_rf}")
print(f"Gradient Boosting Sharpe Ratio: {sharpe_ratio_gb}")

# Recommendation based on Sharpe Ratio
def recommend_investment(sharpe_ratio, threshold=1.0):
    if sharpe_ratio > threshold:
        recommendation = "High potential for profit"
    else:
        recommendation = "High risk of loss"
    return recommendation

# Example usage
recommendation_rf = recommend_investment(sharpe_ratio_rf)
recommendation_gb = recommend_investment(sharpe_ratio_gb)
print(f"Random Forest Investment Recommendation: {recommendation_rf}")
print(f"Gradient Boosting Investment Recommendation: {recommendation_gb}")


Random Forest Sharpe Ratio: 3719500019996808.5
Gradient Boosting Sharpe Ratio: 3780914948447313.0
Random Forest Investment Recommendation: High potential for profit
Gradient Boosting Investment Recommendation: High potential for profit


In [14]:
# Function to display investment recommendations
def display_investment_recommendations():
    display(HTML("<h2>Investment Recommendations</h2>"))
    
    # Historical Performance
    display(HTML(f"<h3>Historical Performance</h3>"))
    display(HTML(f"<p>Mean Return: {mean_return}</p>"))
    display(HTML(f"<p>Volatility: {volatility}</p>"))
    
    # Forecasted Prices Analysis
    display(HTML(f"<h3>Forecasted Prices Analysis</h3>"))
    display(HTML(f"<p>Random Forest Expected Return: {expected_return_rf}, Recommendation: {recommendation_rf}</p>"))
    display(HTML(f"<p>Gradient Boosting Expected Return: {expected_return_gb}, Recommendation: {recommendation_gb}</p>"))
    
    # Risk Assessment
    display(HTML(f"<h3>Risk Assessment</h3>"))
    display(HTML(f"<p>Random Forest Sharpe Ratio: {sharpe_ratio_rf}, Recommendation: {recommendation_rf}</p>"))
    display(HTML(f"<p>Gradient Boosting Sharpe Ratio: {sharpe_ratio_gb}, Recommendation: {recommendation_gb}</p>"))

display_investment_recommendations()
