IMPORTS 

In [1]:
import pickle
import json
import pandas as pd

LOAD THE MODEL

In [15]:
with open("models/best_model.pkl", 'rb') as f:
    model = pickle.load(f)
    
with open("models/model_metadata.json", 'r') as f:
    metadata = json.load(f)

print("Model expects these features:", metadata['features'])

Model expects these features: ['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'avg_sentiment', 'tweet_count', 'next_day_change', '5_day_ma', '10_day_volatility', 'daily_return', 'sentiment_lag1', 'sentiment_ma3']


PREDICTION FUNCTION

GET THE FEATURES FROM META DATA

In [20]:
def predict(features):
    """
    Predicts stock movement with complete feature validation
    
    Args:
        features (dict): Dictionary containing ALL required features
        
    Returns:
        dict: {
            'predicted_change': float,
            'direction': 'UP' or 'DOWN',
            'confidence': percentage (0-99)
        }
    """
    # Get model's expected features (excluding metadata columns)
    expected_features = [f for f in metadata['features'] 
                        if f not in ['Date', 'next_day_change']]
    
    # Validate input features
    missing_features = set(expected_features) - set(features.keys())
    if missing_features:
        raise ValueError(f"Missing {len(missing_features)} features: {missing_features}")
    
    # Convert all features to float
    processed_features = {}
    for feature in expected_features:
        try:
            processed_features[feature] = float(features[feature])
        except (ValueError, TypeError):
            raise ValueError(f"Feature '{feature}' must be numeric")
    
    # Create DataFrame in correct feature order
    input_df = pd.DataFrame([processed_features])[expected_features]
    
    # Make prediction
    pred = model.predict(input_df)[0]
    
    return {
        'predicted_change': float(pred),
        'direction': 'UP' if pred > 0 else 'DOWN',
        'confidence': min(99, round(abs(pred)*100, 1))
    }


In [21]:
# Create complete test input
test_features = {
    'High': 255.50,          # Example value
    'Low': 248.20,           # Example value
    'Open': 252.30,          # Example value
    'Close': 250.00,         # Example value
    'Volume': 5000000,       # Example value
    'daily_return': 0.015,   # Example value
    'avg_sentiment': 0.15,
    'tweet_count': 85,
    '5_day_ma': 250.50,
    '10_day_volatility': 0.02,
    'sentiment_lag1': 0.12,
    'sentiment_ma3': 0.14
}

In [26]:
# Ensure all feature values are floats
test_features = {k: float(v) for k, v in test_features.items()}

# Make prediction
try:
    prediction = predict(test_features)
    print("\nPrediction Result:")
    print(f"Predicted Change: {prediction['predicted_change']:.4f}")
    print(f"Direction: {prediction['direction']}")
    print(f"Confidence: {prediction['confidence']}%")
except ValueError as e:
    print(f"\nError: {str(e)}")
    print("Please ensure your input contains all these features:")
    print(metadata['features'])


Prediction Result:
Predicted Change: 0.0066
Direction: UP
Confidence: 0.7%
