In [14]:
import pandas as pd
import yfinance as yf
from alpha_vantage.timeseries import TimeSeries
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from pymongo import MongoClient
import numpy as np
from scipy.optimize import minimize
import statsmodels.api as sm
import ipywidgets as widgets
from IPython.display import display, HTML


In [15]:
# Function to fetch stock data from Yahoo Finance
def fetch_yahoo_finance_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

# Function to fetch data from Alpha Vantage
def fetch_alpha_vantage_data(ticker, api_key):
    ts = TimeSeries(key=api_key, output_format='pandas')
    data, meta_data = ts.get_daily(symbol=ticker, outputsize='full')
    return data

# Function to clean the data
def clean_data(data):
    data = data.drop_duplicates()
    data = data.fillna(method='ffill').fillna(method='bfill')
    return data

# Function to normalize the data
def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return normalized_data

# Function to standardize the data
def standardize_data(data):
    scaler = StandardScaler()
    standardized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return standardized_data

# Function to save DataFrame to MongoDB
def save_to_mongo(collection_name, data, db):
    collection = db[collection_name]
    collection.delete_many({})  # Clear existing data
    collection.insert_many(data.reset_index().to_dict('records'))

# Function to forecast future values using trained models
def forecast_future_values(model, data, periods=30):
    data = data.ffill().bfill()
    last_date = data.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=periods, freq='D')
    future_data = pd.DataFrame(index=future_dates)
    future_features = pd.DataFrame(index=future_dates, columns=data.columns)
    for col in data.columns:
        future_features[col] = data[col].iloc[-1]
    future_features = future_features.fillna(0)
    if hasattr(model, 'feature_names_in_'):
        missing_cols = set(model.feature_names_in_) - set(future_features.columns)
        if missing_cols:
            raise ValueError(f"Feature names missing in future data: {missing_cols}")
    future_features = future_features[model.feature_names_in_]
    future_predictions = model.predict(future_features)
    return future_data, future_predictions

# Function to calculate portfolio return and risk
def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    return portfolio_return, portfolio_risk

# Function to minimize the negative Sharpe ratio
def minimize_sharpe_ratio(weights, returns, risk_free_rate=0.01):
    portfolio_return, portfolio_risk = portfolio_performance(weights, returns)
    return - (portfolio_return - risk_free_rate) / portfolio_risk

# Optimization constraints
def optimize_portfolio(returns):
    num_assets = len(returns.columns)
    initial_weights = num_assets * [1. / num_assets]
    bounds = tuple((0, 1) for asset in range(num_assets))
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    result = minimize(minimize_sharpe_ratio, initial_weights, args=(returns,), method='SLSQP', bounds=bounds, constraints=constraints)
    return result.x

# Anomaly detection function
def detect_anomalies(data):
    residuals = sm.tsa.ARIMA(data, order=(1, 0, 0)).fit().resid
    std_residuals = np.std(residuals)
    anomalies = residuals[np.abs(residuals) > 3 * std_residuals]
    return anomalies

# Monte Carlo simulation function
def monte_carlo_simulation(returns, num_simulations=1000):
    simulation_results = []
    for _ in range(num_simulations):
        simulated_returns = np.random.choice(returns, size=len(returns))
        simulated_cumulative_returns = np.cumprod(1 + simulated_returns) - 1
        simulation_results.append(simulated_cumulative_returns[-1])
    return simulation_results


In [16]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['financial_data']

# Fetch, clean, normalize, and standardize data
yahoo_data = fetch_yahoo_finance_data('AAPL', '2020-01-01', '2023-01-01')
alpha_data = fetch_alpha_vantage_data('AAPL', 'YOUR_ALPHA_VANTAGE_API_KEY')
cleaned_yahoo_data = clean_data(yahoo_data)
cleaned_alpha_data = clean_data(alpha_data)
normalized_yahoo_data = normalize_data(cleaned_yahoo_data)
standardized_yahoo_data = standardize_data(cleaned_yahoo_data)

# Save the data to MongoDB
save_to_mongo('yahoo_finance', cleaned_yahoo_data, db)
save_to_mongo('alpha_vantage', cleaned_alpha_data, db)
save_to_mongo('normalized_yahoo_finance', normalized_yahoo_data, db)
save_to_mongo('standardized_yahoo_finance', standardized_yahoo_data, db)


[*********************100%%**********************]  1 of 1 completed
  data = data.fillna(method='ffill').fillna(method='bfill')


In [17]:
# Split data into training and testing sets
X = standardized_yahoo_data.drop('Close', axis=1)
y = standardized_yahoo_data['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)

# Evaluate the models
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
gb_mse = mean_squared_error(y_test, gb_predictions)
gb_r2 = r2_score(y_test, gb_predictions)

print(f"Random Forest - MSE: {rf_mse}, R2: {rf_r2}")
print(f"Gradient Boosting - MSE: {gb_mse}, R2: {gb_r2}")

# Forecast future values
rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_yahoo_data)
gb_future_data, gb_future_predictions = forecast_future_values(gb_model, cleaned_yahoo_data)

# Save future predictions to MongoDB
save_to_mongo('rf_future_predictions', pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions}), db)
save_to_mongo('gb_future_predictions', pd.DataFrame({'Date': gb_future_data.index, 'Predicted': gb_future_predictions}), db)


Random Forest - MSE: 0.00048479227308002604, R2: 0.9995539300006174
Gradient Boosting - MSE: 0.00043620965950665514, R2: 0.9995986321289517


In [18]:
# Example usage
historical_returns = cleaned_yahoo_data.pct_change().dropna()
optimal_weights = optimize_portfolio(historical_returns)

print(f"Optimal portfolio weights: {optimal_weights}")

# Anomaly detection
anomalies = detect_anomalies(cleaned_yahoo_data['Close'])
simulation_results = monte_carlo_simulation(historical_returns['Close'])

print(f"Detected anomalies: {anomalies}")
print(f"Monte Carlo simulation results: {simulation_results}")


Optimal portfolio weights: [0.         0.         0.89190439 0.         0.         0.10809561]


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Detected anomalies: Date
2020-01-02   -43.661170
2020-07-31     9.986509
2020-09-03   -10.473174
2022-01-28    11.259786
2022-10-28    11.036419
2022-11-10    12.059666
dtype: float64
Monte Carlo simulation results: [1.3263116389837868, 5.076307421772379, 2.83039998216558, 5.637227808708663, -0.24719585720398252, 0.9654958688356965, 0.8250744063117339, 0.106919812831481, 0.05529920183083559, 3.8818781598847174, 0.33948807135877335, -0.11407645310631997, -0.2862344042358208, 0.5037507767058023, 0.0029951641389169836, 0.5727890793633557, 1.1637081488977552, 1.3726093615055173, -0.11567442990822563, 0.5656641495396881, 1.88027949804425, -0.10166989629776324, 0.7785682778008238, 0.08075987707350518, -0.04367810250294646, 1.0568334940977864, 0.6717275531882951, 1.5631848845614273, 1.2606228231990961, 1.6343034983862061, 1.4518763184396337, -0.11348384200070005, 1.5586945673340455, 0.1257351638521873, 2.218041941686106, 2.7796169042551035, 3.3321690960007864, 0.8352711030544371, 0.0815195807

In [19]:
# Function to display user interface
def create_user_interface():
    ticker_input = widgets.Text(description='Ticker:')
    start_date_input = widgets.Text(description='Start Date:')
    end_date_input = widgets.Text(description='End Date:')
    submit_button = widgets.Button(description='Submit')
    
    def on_button_click(b):
        ticker = ticker_input.value
        start_date = start_date_input.value
        end_date = end_date_input.value
        data = fetch_yahoo_finance_data(ticker, start_date, end_date)
        cleaned_data = clean_data(data)
        normalized_data = normalize_data(cleaned_data)
        standardized_data = standardize_data(cleaned_data)
        
        # Display cleaned data
        display(HTML("<h3>Cleaned Data</h3>"))
        display(cleaned_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Cleaned Data"))

        # Display normalized data
        display(HTML("<h3>Normalized Data</h3>"))
        display(normalized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Normalized Data"))

        # Display standardized data
        display(HTML("<h3>Standardized Data</h3>"))
        display(standardized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Standardized Data"))

        # Forecast future values
        rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_data)
        future_df = pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions})
        display(HTML("<h3>Future Predictions</h3>"))
        display(future_df.style.set_table_attributes('class="table table-striped"').set_caption("Future Predictions"))

        # Portfolio Optimization
        historical_returns = cleaned_data.pct_change().dropna()
        optimal_weights = optimize_portfolio(historical_returns)
        display(HTML(f"<h3>Optimal Portfolio Weights</h3><p>{optimal_weights}</p>"))

        # Risk Management
        anomalies = detect_anomalies(cleaned_data['Close'])
        simulation_results = monte_carlo_simulation(historical_returns['Close'])
        display(HTML(f"<h3>Detected Anomalies</h3><p>{anomalies}</p>"))
        display(HTML(f"<h3>Monte Carlo Simulation Results</h3>"))
        
        # Interpret Monte Carlo Simulation Results
        results_df = pd.DataFrame({'Simulation Result': simulation_results})
        mean_result = results_df.mean()[0]
        variance_result = results_df.var()[0]
        
        display(HTML(f"<p><strong>Mean of Simulated Returns:</strong> {mean_result:.2f}</p>"))
        display(HTML(f"<p><strong>Variance of Simulated Returns:</strong> {variance_result:.2f}</p>"))

        # Recommendations
        display(HTML("<h3>Recommendations</h3>"))
        display(HTML(f"<p>Based on the Monte Carlo simulation results, the mean simulated return is {mean_result:.2f}, and the variance is {variance_result:.2f}. This indicates the expected range of outcomes for your investment.</p>"))
        display(HTML("<p>Consider adjusting your portfolio based on the simulation results. For example, if the simulated results show high volatility or lower-than-expected returns, you may want to consider diversification or other risk management strategies.</p>"))
        display(HTML("<p>Regularly monitor and update your portfolio based on new data and simulations to stay aligned with your financial goals.</p>"))
        display(HTML("<p>Consult with a financial advisor for personalized advice based on these results and your overall investment strategy.</p>"))

    submit_button.on_click(on_button_click)
    display(ticker_input, start_date_input, end_date_input, submit_button)

# Create the user interface
create_user_interface()


Text(value='', description='Ticker:')

Text(value='', description='Start Date:')

Text(value='', description='End Date:')

Button(description='Submit', style=ButtonStyle())

In [11]:
# Function to calculate portfolio return and risk
def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    return portfolio_return, portfolio_risk

# Function to minimize the negative Sharpe ratio
def minimize_sharpe_ratio(weights, returns, risk_free_rate=0.01):
    portfolio_return, portfolio_risk = portfolio_performance(weights, returns)
    return - (portfolio_return - risk_free_rate) / portfolio_risk

# Optimization constraints
def optimize_portfolio(returns):
    num_assets = len(returns.columns)
    initial_weights = num_assets * [1. / num_assets]
    bounds = tuple((0, 1) for asset in range(num_assets))
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    
    result = minimize(minimize_sharpe_ratio, initial_weights, args=(returns,), method='SLSQP', bounds=bounds, constraints=constraints)
    return result.x

# Example usage
historical_returns = cleaned_yahoo_data.pct_change().dropna()
optimal_weights = optimize_portfolio(historical_returns)

print(f"Optimal portfolio weights: {optimal_weights}")


Optimal portfolio weights: [0.         0.         0.89190439 0.         0.         0.10809561]


In [12]:
# Anomaly detection function
def detect_anomalies(data):
    residuals = sm.tsa.ARIMA(data, order=(1, 0, 0)).fit().resid
    std_residuals = np.std(residuals)
    anomalies = residuals[np.abs(residuals) > 3 * std_residuals]
    return anomalies

# Monte Carlo simulation function
def monte_carlo_simulation(returns, num_simulations=1000):
    simulation_results = []
    for _ in range(num_simulations):
        simulated_returns = np.random.choice(returns, size=len(returns))
        simulated_cumulative_returns = np.cumprod(1 + simulated_returns) - 1
        simulation_results.append(simulated_cumulative_returns[-1])
    return simulation_results

# Example usage
anomalies = detect_anomalies(cleaned_yahoo_data['Close'])
simulation_results = monte_carlo_simulation(historical_returns['Close'])

print(f"Detected anomalies: {anomalies}")
print(f"Monte Carlo simulation results: {simulation_results}")


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Detected anomalies: Date
2020-01-02   -43.661170
2020-07-31     9.986509
2020-09-03   -10.473174
2022-01-28    11.259786
2022-10-28    11.036419
2022-11-10    12.059666
dtype: float64
Monte Carlo simulation results: [2.183056013340061, -0.08528802367292287, 0.6764431504276776, 0.512747985467477, -0.1881416388567294, 0.45813015918999467, 0.24747207282840678, 0.22402912992283763, 0.0038573154993728487, 0.3847216372715401, 1.7233631434878633, 0.6860633544973009, 2.1470172675254666, 1.6769530530353358, 1.5291071968686838, 1.0625643426236184, 0.9102889321029153, -0.31836685996131575, 0.9218029443974249, 1.291666113527612, 1.1017773965674929, 0.13383859244519392, 2.9545485969208682, 0.44929574520562854, 0.23468664496770786, -0.11732005967740877, -0.3722559679704639, 1.6855263525602449, 0.9248887218572983, 0.651629276647461, 1.1379436419220794, 0.8374226761713519, 0.27928837992502964, 0.25299734810561847, -0.17151814247697394, 4.0212683412236325, 1.839378327044165, 1.9759979768030886, 3.32142

In [13]:
# Function to display user interface
def create_user_interface():
    ticker_input = widgets.Text(description='Ticker:')
    start_date_input = widgets.Text(description='Start Date:')
    end_date_input = widgets.Text(description='End Date:')
    submit_button = widgets.Button(description='Submit')
    
    def on_button_click(b):
        ticker = ticker_input.value
        start_date = start_date_input.value
        end_date = end_date_input.value
        data = fetch_yahoo_finance_data(ticker, start_date, end_date)
        cleaned_data = clean_data(data)
        normalized_data = normalize_data(cleaned_data)
        standardized_data = standardize_data(cleaned_data)
        
        # Display cleaned data
        display(HTML("<h3>Cleaned Data</h3>"))
        display(cleaned_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Cleaned Data"))

        # Display normalized data
        display(HTML("<h3>Normalized Data</h3>"))
        display(normalized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Normalized Data"))

        # Display standardized data
        display(HTML("<h3>Standardized Data</h3>"))
        display(standardized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Standardized Data"))
        
        # Forecast future values
        rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_data)
        gb_future_data, gb_future_predictions = forecast_future_values(gb_model, cleaned_data)
        
        # Create DataFrame for display
        rf_future_df = pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions})
        gb_future_df = pd.DataFrame({'Date': gb_future_data.index, 'Predicted': gb_future_predictions})
        
        # Display future predictions
        display(HTML("<h3>Future Predictions with Random Forest Model</h3>"))
        display(rf_future_df.style.set_table_attributes('class="table table-striped"').set_caption("Random Forest Predictions"))

        display(HTML("<h3>Future Predictions with Gradient Boosting Model</h3>"))
        display(gb_future_df.style.set_table_attributes('class="table table-striped"').set_caption("Gradient Boosting Predictions"))
        
        # Portfolio Optimization
        historical_returns = cleaned_data.pct_change().dropna()
        optimal_weights = optimize_portfolio(historical_returns)
        display(HTML(f"<h3>Optimal Portfolio Weights</h3><p>{optimal_weights}</p>"))

        # Risk Management
        anomalies = detect_anomalies(cleaned_data['Close'])
        simulation_results = monte_carlo_simulation(historical_returns['Close'])
        
        # Display anomalies
        display(HTML("<h3>Detected Anomalies</h3>"))
        if anomalies.empty:
            display(HTML("<p>No anomalies detected.</p>"))
        else:
            display(anomalies.reset_index().rename(columns={'index': 'Date', 'Close': 'Anomaly'}).style.set_table_attributes('class="table table-striped"').set_caption("Anomalies"))

        # Display Monte Carlo simulation results
        display(HTML("<h3>Monte Carlo Simulation Results</h3>"))
        sim_df = pd.DataFrame({'Simulation Result': simulation_results})
        display(sim_df.describe().style.set_table_attributes('class="table table-striped"').set_caption("Monte Carlo Simulation Summary"))

    submit_button.on_click(on_button_click)
    display(ticker_input, start_date_input, end_date_input, submit_button)

# Create the user interface
create_user_interface()


Text(value='', description='Ticker:')

Text(value='', description='Start Date:')

Text(value='', description='End Date:')

Button(description='Submit', style=ButtonStyle())