In [6]:
# Install required libraries if not already installed


# Import required libraries
import pandas as pd
import yfinance as yf
from alpha_vantage.timeseries import TimeSeries
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from pymongo import MongoClient
import numpy as np
from scipy.optimize import minimize
import statsmodels.api as sm
import ipywidgets as widgets
from IPython.display import display


In [7]:
# Function to fetch stock data from Yahoo Finance
def fetch_yahoo_finance_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

# Function to fetch data from Alpha Vantage
def fetch_alpha_vantage_data(ticker, api_key):
    ts = TimeSeries(key=api_key, output_format='pandas')
    data, meta_data = ts.get_daily(symbol=ticker, outputsize='full')
    return data

# Function to clean the data
def clean_data(data):
    data = data.drop_duplicates()
    data = data.fillna(method='ffill').fillna(method='bfill')
    return data

# Function to normalize the data
def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return normalized_data

# Function to standardize the data
def standardize_data(data):
    scaler = StandardScaler()
    standardized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
    return standardized_data

# Function to save DataFrame to MongoDB
def save_to_mongo(collection_name, data, db):
    collection = db[collection_name]
    collection.delete_many({})  # Clear existing data
    collection.insert_many(data.reset_index().to_dict('records'))


In [8]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['financial_data']

# Fetch, clean, normalize, and standardize data
yahoo_data = fetch_yahoo_finance_data('AAPL', '2020-01-01', '2023-01-01')
alpha_data = fetch_alpha_vantage_data('AAPL', 'USLF3P1QRCK9TLCC')
cleaned_yahoo_data = clean_data(yahoo_data)
cleaned_alpha_data = clean_data(alpha_data)
normalized_yahoo_data = normalize_data(cleaned_yahoo_data)
standardized_yahoo_data = standardize_data(cleaned_yahoo_data)

# Save the data to MongoDB
save_to_mongo('yahoo_finance', cleaned_yahoo_data, db)
save_to_mongo('alpha_vantage', cleaned_alpha_data, db)
save_to_mongo('normalized_yahoo_finance', normalized_yahoo_data, db)
save_to_mongo('standardized_yahoo_finance', standardized_yahoo_data, db)


[*********************100%%**********************]  1 of 1 completed
  data = data.fillna(method='ffill').fillna(method='bfill')


In [9]:
# Split data into training and testing sets
X = standardized_yahoo_data.drop('Close', axis=1)
y = standardized_yahoo_data['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)

# Evaluate the models
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
gb_mse = mean_squared_error(y_test, gb_predictions)
gb_r2 = r2_score(y_test, gb_predictions)

print(f"Random Forest - MSE: {rf_mse}, R2: {rf_r2}")
print(f"Gradient Boosting - MSE: {gb_mse}, R2: {gb_r2}")


Random Forest - MSE: 0.00048481716564134625, R2: 0.9995539070963233
Gradient Boosting - MSE: 0.00043620965950665514, R2: 0.9995986321289517


In [15]:
import pandas as pd

# Function to forecast future values using trained models
def forecast_future_values(model, data, periods=30):
    # Ensure that the data has no missing values
    data = data.ffill().bfill()
    
    # Generate future dates
    last_date = data.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=periods, freq='D')
    future_data = pd.DataFrame(index=future_dates)
    
    # Ensure the future_data DataFrame has the same columns as data
    future_features = pd.DataFrame(index=future_dates, columns=data.columns)
    
    # Fill the future_features DataFrame with the last known values
    for col in data.columns:
        future_features[col] = data[col].iloc[-1]
    
    # Reindex future_features to ensure it has all columns
    future_features = future_features.fillna(0)
    
    # Align feature names to match those used during training
    if hasattr(model, 'feature_names_in_'):
        missing_cols = set(model.feature_names_in_) - set(future_features.columns)
        if missing_cols:
            raise ValueError(f"Feature names missing in future data: {missing_cols}")
    
    # Ensure future_features has the correct columns order
    future_features = future_features[model.feature_names_in_]
    
    # Make predictions
    future_predictions = model.predict(future_features)
    
    return future_data, future_predictions

# Forecast future values
rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_yahoo_data)
gb_future_data, gb_future_predictions = forecast_future_values(gb_model, cleaned_yahoo_data)

# Save future predictions to MongoDB
def save_to_mongo(collection_name, data, db):
    collection = db[collection_name]
    collection.delete_many({})  # Clear existing data
    collection.insert_many(data.to_dict('records'))

save_to_mongo('rf_future_predictions', pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions}), db)
save_to_mongo('gb_future_predictions', pd.DataFrame({'Date': gb_future_data.index, 'Predicted': gb_future_predictions}), db)


In [16]:
# Function to calculate portfolio return and risk
def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    return portfolio_return, portfolio_risk

# Function to minimize the negative Sharpe ratio
def minimize_sharpe_ratio(weights, returns, risk_free_rate=0.01):
    portfolio_return, portfolio_risk = portfolio_performance(weights, returns)
    return - (portfolio_return - risk_free_rate) / portfolio_risk

# Optimization constraints
def optimize_portfolio(returns):
    num_assets = len(returns.columns)
    initial_weights = num_assets * [1. / num_assets]
    bounds = tuple((0, 1) for asset in range(num_assets))
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    
    result = minimize(minimize_sharpe_ratio, initial_weights, args=(returns,), method='SLSQP', bounds=bounds, constraints=constraints)
    return result.x

# Example usage
historical_returns = cleaned_yahoo_data.pct_change().dropna()
optimal_weights = optimize_portfolio(historical_returns)

print(f"Optimal portfolio weights: {optimal_weights}")


Optimal portfolio weights: [0.00000000e+00 0.00000000e+00 8.91904397e-01 2.88250502e-18
 0.00000000e+00 1.08095603e-01]


In [17]:
# Anomaly detection function
def detect_anomalies(data):
    residuals = sm.tsa.ARIMA(data, order=(1, 0, 0)).fit().resid
    std_residuals = np.std(residuals)
    anomalies = residuals[np.abs(residuals) > 3 * std_residuals]
    return anomalies

# Monte Carlo simulation function
def monte_carlo_simulation(returns, num_simulations=1000):
    simulation_results = []
    for _ in range(num_simulations):
        simulated_returns = np.random.choice(returns, size=len(returns))
        simulated_cumulative_returns = np.cumprod(1 + simulated_returns) - 1
        simulation_results.append(simulated_cumulative_returns[-1])
    return simulation_results

# Example usage
anomalies = detect_anomalies(cleaned_yahoo_data['Close'])
simulation_results = monte_carlo_simulation(historical_returns['Close'])

print(f"Detected anomalies: {anomalies}")
print(f"Monte Carlo simulation results: {simulation_results}")


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Detected anomalies: Date
2020-01-02   -43.661170
2020-07-31     9.986509
2020-09-03   -10.473174
2022-01-28    11.259786
2022-10-28    11.036419
2022-11-10    12.059666
dtype: float64
Monte Carlo simulation results: [0.902433206325367, 1.3841182033964525, -0.44101160401951944, 0.7476021498651051, 0.8179883036018645, -0.22031031147310398, -0.19793970928924143, 2.755969439080621, 1.1221347284641552, 2.064259964748514, 0.3600452899836486, 0.4294035437029484, 7.008916360139473, -0.36283996124344253, -0.75909938374464, 3.233541324315566, 0.929923447003874, 0.3710464581463857, 0.5576516131820612, 3.056671880542086, 1.5332945978587502, 2.1211064687983567, 1.6203423487653317, 0.2568920978414828, 1.2032672018026878, 3.6212783512972937, 1.305078366797634, 1.411078434094823, 0.47529462713362713, -0.20765814252464543, 2.9134325942136297, 0.6589684198992098, 0.9604789348785863, 0.35664517984076993, 2.3414114637529084, 0.6852842580809979, -0.1516337136550544, 1.3687334079551197, 0.8236313590342919, 

In [22]:
from IPython.display import display, HTML
import pandas as pd
import ipywidgets as widgets

# Function to display user interface
def create_user_interface():
    ticker_input = widgets.Text(description='Ticker:')
    start_date_input = widgets.Text(description='Start Date:')
    end_date_input = widgets.Text(description='End Date:')
    submit_button = widgets.Button(description='Submit')
    
    def on_button_click(b):
        ticker = ticker_input.value
        start_date = start_date_input.value
        end_date = end_date_input.value
        data = fetch_yahoo_finance_data(ticker, start_date, end_date)
        cleaned_data = clean_data(data)
        normalized_data = normalize_data(cleaned_data)
        standardized_data = standardize_data(cleaned_data)
        
        # Display cleaned data
        display(HTML("<h3>Cleaned Data</h3>"))
        display(cleaned_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Cleaned Data"))

        # Display normalized data
        display(HTML("<h3>Normalized Data</h3>"))
        display(normalized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Normalized Data"))

        # Display standardized data
        display(HTML("<h3>Standardized Data</h3>"))
        display(standardized_data.head().style.set_table_attributes('class="table table-striped"').set_caption("Standardized Data"))
        
        # Forecast future values
        rf_future_data, rf_future_predictions = forecast_future_values(rf_model, cleaned_data)
        future_df = pd.DataFrame({'Date': rf_future_data.index, 'Predicted': rf_future_predictions})
        display(HTML("<h3>Future Predictions</h3>"))
        display(future_df.style.set_table_attributes('class="table table-striped"').set_caption("Future Predictions"))
        
        # Portfolio Optimization
        historical_returns = cleaned_data.pct_change().dropna()
        optimal_weights = optimize_portfolio(historical_returns)
        display(HTML(f"<h3>Optimal Portfolio Weights</h3><p>{optimal_weights}</p>"))

        # Risk Management
        anomalies = detect_anomalies(cleaned_data['Close'])
        simulation_results = monte_carlo_simulation(historical_returns['Close'])
        display(HTML(f"<h3>Detected Anomalies</h3><p>{anomalies}</p>"))
        display(HTML(f"<h3>Monte Carlo Simulation Results</h3><p>{simulation_results}</p>"))

    submit_button.on_click(on_button_click)
    display(ticker_input, start_date_input, end_date_input, submit_button)

# Create the user interface
create_user_interface()


Text(value='', description='Ticker:')

Text(value='', description='Start Date:')

Text(value='', description='End Date:')

Button(description='Submit', style=ButtonStyle())