# Valuation Model Notebook
This notebook retrieves data from Snowflake, performs feature engineering, trains a model, and predicts valuation.

In [None]:
# 1. Import Libraries
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Import custom data retrieval module
from retrieveData import MicroeconomicDataRetriever


In [None]:
# 2. Load Microeconomic Data from Snowflake
retriever = MicroeconomicDataRetriever()
micro_df = retriever.retrieve_data()

# Display the first few rows of the retrieved data to check structure
micro_df.head()


In [None]:
# 3. Feature Engineering on Retrieved Data
# Moving Average Features for Stock Prices (if stock prices are included in df)
if 'Stock_Price' in micro_df.columns:
    micro_df['Stock_Price_MA_3'] = micro_df['Stock_Price'].rolling(window=3).mean()
    micro_df['Stock_Price_MA_6'] = micro_df['Stock_Price'].rolling(window=6).mean()
    
# Volatility (Standard Deviation of Stock Price over 3 and 6 months)
if 'Stock_Price' in micro_df.columns:
    micro_df['Stock_Volatility_3M'] = micro_df['Stock_Price'].rolling(window=3).std()
    micro_df['Stock_Volatility_6M'] = micro_df['Stock_Price'].rolling(window=6).std()

# Lag Features for Economic Metrics
micro_df['GDP_Lag_1'] = micro_df['GDP'].shift(1)
micro_df['Inflation_Rate_Lag_1'] = micro_df['Inflation_Rate'].shift(1)
micro_df['Unemployment_Rate_Lag_1'] = micro_df['Unemployment_Rate'].shift(1)

# Drop rows with NaN values generated from rolling and lag operations
micro_df.dropna(inplace=True)

# Display the data after feature engineering to verify new columns
micro_df.head()


In [None]:
# 4. Preprocessing
# Define target and features
target = 'Target_Label'  # This should indicate if the stock is undervalued (1) or overvalued (0)
drop_cols = ['DateTime', 'Target_Label']  # Exclude non-feature columns
X = micro_df.drop(columns=drop_cols, errors='ignore')
y = micro_df[target]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)


In [None]:
# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# 6. Model Training with Random Forest
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [None]:
# 7. Evaluation
y_pred = model.predict(X_test)
print("Model Evaluation:")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
# 8. Feature Importance Visualization
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
feature_importances.nlargest(10).plot(kind='barh', title="Top 10 Feature Importances")
plt.show()
