<a href="https://colab.research.google.com/github/kunan-au/Modeling_Risk/blob/main/Feature_Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Seeking Factors

Minimize

Model using


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Package

In [None]:
import pandas as pd
import gspread
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import auth
from google.auth import default
import statsmodels.api as sm
from scipy import stats

Data Importing

In [None]:
#Setup for Google Sheets API
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
def get_sheet_data(spreadsheet_name, worksheet_index=0):
    spreadsheet = gc.open(spreadsheet_name)
    worksheet = spreadsheet.get_worksheet(worksheet_index)
    data = worksheet.get_all_values()
    df = pd.DataFrame(data)
    df.columns = df.iloc[0]
    df = df.drop(0)
    df['Date'] = pd.to_datetime(df['Date'].str.split().str[0], format='%m/%d/%Y').dt.date
    numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

In [None]:
sheet_names = ["MSFT", "META", "NFLX"]

dataframes = [get_sheet_data(name) for name in sheet_names]

combined_df = pd.concat(dataframes, ignore_index=True)

for df in dataframes:
    print(df.head())

Basic

In [None]:
# Price and Volume Plot for all DataFrames
for df, name in zip(dataframes, sheet_names):
    # Price and Volume Plot
    plt.figure(figsize=(14, 6))
    plt.subplot(2, 1, 1)
    plt.plot(df['Close'], label='Close Price')
    plt.legend()

    plt.subplot(2, 1, 2)
    plt.bar(df.index, df['Volume'], color='orange')
    plt.tight_layout()
    plt.show()
    print(f'Daily Closing Prices for {name}')
    print(f'Daily Volume for {name}')

    # Moving Average Plot
    plt.figure(figsize=(10, 6))
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA30'] = df['Close'].rolling(window=30).mean()
    plt.plot(df['Close'], label='Close Price')
    plt.plot(df['MA5'], label='5-Day MA')
    plt.plot(df['MA30'], label='30-Day MA')
    plt.legend()
    plt.show()
    print(f'Moving Averages for {name}')

    # Price Fluctuation Plot
    plt.figure(figsize=(10, 6))
    df['Price_Range'] = df['High'] - df['Low']
    plt.plot(df['Price_Range'])
    plt.show()
    print(f'Price Fluctuation for {name}')

    # Momentum Plot
    plt.figure(figsize=(10, 6))
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    plt.plot(df['Momentum'])
    plt.show()
    print(f'Momentum for {name}')

    # Correlation Heatmap
    plt.figure(figsize=(8, 6))
    correlation = df.corr()
    sns.heatmap(correlation, annot=True, cmap='coolwarm')
    plt.show()
    print(f'Correlation Heatmap for {name}')

ADV

In [None]:
def calculate_and_visualize_volatility(df, window=20):
    returns = df['Close'].pct_change().dropna()
    volatility = returns.rolling(window=window).std()

    plt.figure(figsize=(10, 6))
    plt.plot(volatility, label='Volatility')
    plt.title('Volatility Factor')
    plt.legend()
    plt.show()

In [None]:
def calculate_alpha_and_beta(asset_returns, market_returns):
    slope, intercept, r_value, p_value, std_err = stats.linregress(asset_returns, market_returns)
    alpha = intercept
    beta = slope
    return alpha, beta

In [None]:
def calculate_and_visualize_sharpe_ratio(df, risk_free_rate=0.02):
    returns = df['Close'].pct_change().dropna()
    mean_return = returns.mean()
    volatility = returns.std()
    sharpe_ratio = (mean_return - risk_free_rate) / volatility

    plt.figure(figsize=(10, 6))
    plt.plot(returns, label='Asset Returns')
    plt.title('Asset Returns')
    plt.legend()
    plt.show()

    print(f'Sharpe Ratio: {sharpe_ratio}')

In [None]:
def calculate_and_visualize_r_squared(asset_returns, market_returns):
    r_squared = np.corrcoef(asset_returns, market_returns)[0, 1] ** 2
    print(f'R-Squared Value: {r_squared}')

In [None]:
def calculate_and_visualize_cointegration(asset1_prices, asset2_prices):
    result = statsmodels.api.OLS(asset1_prices, asset2_prices).fit()
    cointegration_pvalue = statsmodels.api.tsa.stattools.adfuller(result.resid)[1]

    if cointegration_pvalue < 0.05:
        print('The assets are cointegrated.')
    else:
        print('The assets are not cointegrated.')

In [None]:
def calculate_and_visualize_technical_indicator(df, indicator='MA', window=20):
    if indicator == 'MA':
        df['Moving_Average'] = df['Close'].rolling(window=window).mean()
    elif indicator == 'RSI':
        delta = df['Close'].diff(1)
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        avg_gain = gain.rolling(window=window).mean()
        avg_loss = loss.rolling(window=window).mean()
        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        df['RSI'] = rsi
    elif indicator == 'Bollinger_Bands':
        df['MA'] = df['Close'].rolling(window=window).mean()
        df['Upper_Band'] = df['MA'] + 2 * df['Close'].rolling(window=window).std()
        df['Lower_Band'] = df['MA'] - 2 * df['Close'].rolling(window=window).std()

    plt.figure(figsize=(10, 6))
    if indicator == 'MA':
        plt.plot(df['Moving_Average'], label='Moving Average')
        plt.title('Moving Average')
    elif indicator == 'RSI':
        plt.plot(df['RSI'], label='RSI')
        plt.title('Relative Strength Index (RSI)')
    elif indicator == 'Bollinger_Bands':
        plt.plot(df['Close'], label='Close Price', alpha=0.5)
        plt.plot(df['Upper_Band'], label='Upper Bollinger Band')
        plt.plot(df['Lower_Band'], label='Lower Bollinger Band')
        plt.title('Bollinger Bands')
    plt.legend()
    plt.show()

In [None]:
def factor_model_analysis(asset_returns, market_returns, risk_factors):
    X = sm.add_constant(risk_factors)
    model = sm.OLS(asset_returns, X).fit()
    coefficients = model.params
    print(model.summary())
    plt.figure(figsize=(10, 6))
    plt.bar(risk_factors.columns, coefficients[1:])
    plt.xlabel('Risk Factors')
    plt.ylabel('Factor Loadings')
    plt.title('Factor Loadings for Asset Returns')
    plt.show()