# Feature Analysis for High-Frequency Trading
## Backend Work - Analyzing Predictive Power of Trading Features

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("Feature Analysis Notebook")

In [None]:
# Load MSFT data
df = pd.read_csv('MSFT_1m.csv', skiprows=1)
df.columns = ['timestamp', 'close', 'high', 'low', 'open', 'volume']
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values('timestamp').reset_index(drop=True)
print(f"Loaded {len(df)} rows")
df.head()

In [None]:
# Calculate features
data = df.copy()
data['momentum_1min'] = data['close'].pct_change()
data['volatility_1min'] = data['momentum_1min'] ** 2
data['price_direction'] = (data['close'] > data['open']).astype(int)
data['vwap'] = (data['close'] * data['volume']).cumsum() / data['volume'].cumsum()
data['vwap_dev'] = (data['close'] - data['vwap']) / data['vwap']
data['hour'] = data['timestamp'].dt.hour
data['minute'] = data['timestamp'].dt.minute
data['next_return'] = data['close'].shift(-1) / data['close'] - 1
data = data.dropna()
print(f"Features calculated. {len(data)} rows.")

In [None]:
# Correlation analysis
features = ['momentum_1min', 'volatility_1min', 'price_direction', 'vwap_dev', 'hour', 'minute']
correlations = {f: data[f].corr(data['next_return']) for f in features}
corr_df = pd.DataFrame(list(correlations.items()), columns=['Feature', 'Correlation'])
corr_df = corr_df.sort_values('Correlation', key=abs, ascending=False)
print(corr_df)

In [None]:
# Visualization
plt.figure(figsize=(10, 6))
colors = ['green' if x > 0 else 'red' for x in corr_df['Correlation']]
plt.barh(corr_df['Feature'], corr_df['Correlation'], color=colors, alpha=0.7)
plt.xlabel('Correlation with Next-Minute Return')
plt.title('Feature Predictive Power')
plt.axvline(x=0, color='black', linestyle='--')
plt.grid(axis='x', alpha=0.3)
plt.show()