In [None]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from retrieveData import MicroeconomicDataRetriever

In [None]:
# 2. Load Data using MicroeconomicDataRetriever
# Initialize the data retriever
retriever = MicroeconomicDataRetriever()
# Display the data to check its structure
df.head()

In [None]:
# 3. Feature Engineering
# a. Moving Average of Stock Price (3-month, 6-month)
df['Stock_Price_MA_3'] = df['Stock_Price'].rolling(window=3).mean()
df['Stock_Price_MA_6'] = df['Stock_Price'].rolling(window=6).mean()

# b. Volatility (Standard Deviation of Stock Price over 3 and 6 months)
df['Stock_Volatility_3M'] = df['Stock_Price'].rolling(window=3).std()
df['Stock_Volatility_6M'] = df['Stock_Price'].rolling(window=6).std()

# c. Lag Features for GDP and Inflation (1-month lag)
df['GDP_Lag_1'] = df['GDP'].shift(1)
df['Inflation_Rate_Lag_1'] = df['Inflation_Rate'].shift(1)

# d. Sentiment Score (Optional if available)
# Assuming sentiment scores were derived and added in the retrieval stage
# If not, calculate it here using NLP (requires sentiment analysis libraries)

# Drop rows with NaN values generated from rolling and lag operations
df.dropna(inplace=True)

# Display the data again after feature engineering
df.head()

In [None]:
# 4. Preprocessing
# Define target and features
target = 'Target_Label'  # Assuming Target_Label indicates if AMD is undervalued (1) or overvalued (0)
drop_cols = ['DateTime', 'Target_Label']
X = df.drop(columns=drop_cols)
y = df[target]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

In [None]:
# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# 6. Model Training with Random Forest
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
# 7. Evaluation
y_pred = model.predict(X_test)
print("Model Evaluation:")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

In [None]:
# 8. Feature Importance
# Display the most important features according to the model
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
feature_importances.nlargest(10).plot(kind='barh', title="Top 10 Feature Importances")
plt.show()

In [None]:
# 9. Predicting AMD Valuation
# Assuming you have current data for AMD’s valuation
# Replace with actual latest AMD data
amd_data = {
    'GDP': [df['GDP'].iloc[-1]],  # Use most recent GDP
    'Inflation_Rate': [df['Inflation_Rate'].iloc[-1]],  # Use most recent Inflation Rate
    # Add other required features based on the columns in X
}

# Convert AMD data to DataFrame
amd_df = pd.DataFrame(amd_data)

# Apply scaling to AMD data
amd_scaled = scaler.transform(amd_df)

# Predict AMD’s valuation (1 = undervalued, 0 = overvalued)
amd_prediction = model.predict(amd_scaled)
print("AMD Valuation Prediction:", "Undervalued" if amd_prediction[0] == 1 else "Overvalued")