# AI-Based Market Trend Analysis – End-to-End System

This notebook presents a complete AI-driven pipeline for market trend analysis, from data generation and feature engineering to machine learning, sentiment analysis, and final decision support.


In [None]:
%pip install yfinance pandas numpy matplotlib scikit-learn nltk

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk

nltk.data.path.append('/Users/NKV.Manasa/nltk_data')
nltk.download('vader_lexicon', quiet=True)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

lexicon_path = "/Users/NKV.Manasa/nltk_data/sentiment/vader_lexicon.txt"
sia = SentimentIntensityAnalyzer(lexicon_file=lexicon_path)

sia.polarity_scores("Apple stock rises after strong earnings")


In [None]:
# Loading real Apple stock market data from CSV

data = pd.read_csv("AAPL.csv")

data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')

data.head()


In [None]:
data.info()

In [None]:
data.describe()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(data['Date'], data['Close'], label='Close Price')
plt.title('Apple (AAPL) Stock Price Trend 2025-2026')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()

In [None]:
# Feature Engineering

data['Daily_Return'] = data['Close'].pct_change() # Daily percentage return

# Moving averages
data['MA_10'] = data['Close'].rolling(window=10).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()

data = data.dropna()

data.head()


In [None]:
# Creating target variable (1 = price goes up next day, 0 = down)

data['Target'] = np.where(
    data['Close'].shift(-1) > data['Close'], 1, 0
)

# Drop last row (no future value)
data = data.dropna()

data[['Date', 'Close', 'Target']].head()


In [None]:
# Preparing features and labels

features = ['Daily_Return', 'MA_10', 'MA_50']
X = data[features]
y = data['Target']

# Time-series split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

X_train.shape, X_test.shape


In [None]:
# Train Logistic Regression model

model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
# Evaluate model

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
%pip install seaborn

In [None]:
# Confusion Matrix Visualization

import seaborn as sns

plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


Model Evaluation & Analysis

The machine learning model was evaluated using accuracy, confusion matrix, and classification metrics. The results indicate that historical price-based features such as daily returns and moving averages can provide useful signals for short-term trend prediction. However, due to the inherent randomness of financial markets, prediction accuracy is limited and should be interpreted cautiously.

In [None]:
# Backtesting model predictions over time

test_results = data.iloc[len(X_train):].copy()
test_results['Predicted_Trend'] = y_pred
test_results['Actual_Trend'] = y_test.values

test_results[['Date', 'Close', 'Actual_Trend', 'Predicted_Trend']].head()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(test_results['Date'], test_results['Close'], label='Close Price', alpha=0.6)

buy_signals = test_results[test_results['Predicted_Trend'] == 1]
sell_signals = test_results[test_results['Predicted_Trend'] == 0]

plt.scatter(buy_signals['Date'], buy_signals['Close'], color='green', label='Predicted Up', s=10)
plt.scatter(sell_signals['Date'], sell_signals['Close'], color='red', label='Predicted Down', s=10)

plt.title('Backtesting ML Predictions on Stock Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
# ROC Curve

from sklearn.metrics import roc_curve, auc

y_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6,5))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0,1], [0,1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
# Latest market trend prediction using ML model

latest_features = X.iloc[-1:].values
ml_prediction = model.predict(latest_features)[0]

if ml_prediction == 1:
    print("ML Prediction: Stock likely to go UP")
else:
    print("ML Prediction: Stock likely to go DOWN")


In [None]:
# Recomputing average sentiment

news_headlines = [
    "Apple stock rises as iPhone sales exceed expectations",
    "Concerns over supply chain affect Apple shares",
    "Apple reports strong quarterly earnings",
    "Market uncertainty impacts technology stocks"
]

sentiment_scores = [
    sia.polarity_scores(headline)['compound']
    for headline in news_headlines
]

average_sentiment = np.mean(sentiment_scores)

average_sentiment


In [None]:
# Sentiment Analysis Visualization

plt.figure(figsize=(8,4))
plt.bar(news_headlines, sentiment_scores)
plt.axhline(0, linestyle='--')
plt.xticks(rotation=20, ha='right')
plt.title("News Sentiment Analysis (VADER)")
plt.ylabel("Sentiment Score (Compound)")
plt.tight_layout()
plt.show()


In [None]:
# Combining ML prediction with sentiment analysis

if ml_prediction == 1 and average_sentiment > 0:
    final_insight = "Strong Positive Market Trend"
elif ml_prediction == 0 and average_sentiment < 0:
    final_insight = "Strong Negative Market Trend"
else:
    final_insight = "Mixed Market Signals"

final_insight


In [None]:
# Moving Average Crossover Visualization
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(data['Date'], data['Close'], label='Close Price', alpha=0.6)
plt.plot(data['Date'], data['MA_10'], label='MA 10', linestyle='--')
plt.plot(data['Date'], data['MA_50'], label='MA 50', linestyle='--')
plt.title('Apple Stock Trend with Moving Average Crossover')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()


In [None]:
# Trend regime based on moving averages

data['Trend_Regime'] = np.where(
    data['MA_10'] > data['MA_50'], 'Bullish', 'Bearish'
)

data[['Date', 'Close', 'Trend_Regime']].head()


In [None]:
plt.figure(figsize=(12,6))

bullish = data[data['Trend_Regime'] == 'Bullish']
bearish = data[data['Trend_Regime'] == 'Bearish']

plt.plot(bullish['Date'], bullish['Close'], 'g.', label='Bullish')
plt.plot(bearish['Date'], bearish['Close'], 'r.', label='Bearish')

plt.title('Bullish vs Bearish Market Regimes')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
# Distribution of daily returns

plt.figure(figsize=(10,5))
plt.hist(data['Daily_Return'], bins=50)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.show()


## Volatility Analysis

Volatility measures the degree of variation in stock returns and is a key indicator of market risk.
Higher volatility indicates unstable market conditions, while lower volatility suggests steady price movements.


In [None]:
volatility = data['Daily_Return'].std()
volatility


In [None]:
# Coefficient analysis for Logistic Regression

coefficients = pd.DataFrame({
    'Feature': features,
    'Coefficient': model.coef_[0]
})

coefficients


In [None]:
# Feature Importance Visualization

plt.figure(figsize=(6,4))
plt.bar(coefficients['Feature'], coefficients['Coefficient'])
plt.title('Feature Importance (Logistic Regression)')
plt.xlabel('Feature')
plt.ylabel('Coefficient Value')
plt.show()


## Model Interpretation

The coefficients of the Logistic Regression model indicate the influence of each feature on trend prediction.
Positive coefficients suggest a higher likelihood of upward price movement, while negative values indicate downward pressure.
This interpretability is crucial in financial AI systems.


## System Pipeline Overview

1. Market data is generated to simulate historical Apple stock prices.
2. Financial indicators such as daily returns and moving averages are engineered.
3. A machine learning model predicts short-term market trends.
4. News sentiment analysis provides contextual market signals.
5. Both outputs are combined to produce a final market insight.

This modular pipeline ensures interpretability, reproducibility, and responsible AI usage.

## Why Logistic Regression?

Logistic Regression was selected over complex deep learning models due to its interpretability, lower risk of overfitting, and suitability for binary classification with limited features.
This aligns with real-world financial systems where explainability is essential.


In [None]:
# Market Signals Summary

market_summary = pd.DataFrame({
    "Signal": [
        "ML Price Trend Prediction",
        "News Sentiment",
        "Trend Regime (MA-based)",
        "Market Volatility"
    ],
    "Result": [
        "Up" if ml_prediction == 1 else "Down",
        "Positive" if average_sentiment > 0 else "Negative",
        data['Trend_Regime'].iloc[-1],
        "High" if volatility > data['Daily_Return'].mean() else "Moderate"
    ]
})

market_summary


In [None]:
# Final Decision Engine

decision_score = 0

# ML signal
decision_score += 1 if ml_prediction == 1 else -1

# Sentiment signal
decision_score += 1 if average_sentiment > 0 else -1

# Trend regime signal
decision_score += 1 if data['Trend_Regime'].iloc[-1] == 'Bullish' else -1

# Final BUY / SELL decision
if decision_score >= 1:
    market_decision = "BUY"
else:
    market_decision = "SELL"

print("Final Market Decision:", market_decision)
print("Decision Score:", decision_score)



The final market decision represents a consolidated view derived from multiple analytical layers. 
Rather than relying on a single indicator, the system aggregates multiple signals into a weighted score to generate actionable BUY or SELL recommendations.

In [None]:
# Final Market Decision Summary

decision_summary = pd.DataFrame({
    "Component": [
        "ML Trend Prediction",
        "News Sentiment",
        "Trend Regime",
        "Market Volatility",
        "Final AI Decision"
    ],
    "Result": [
        "Up" if ml_prediction == 1 else "Down",
        "Positive" if average_sentiment > 0 else "Negative",
        data['Trend_Regime'].iloc[-1],
        f"{volatility:.4f}",
        market_decision
    ]
})

decision_summary



In [None]:
plt.figure(figsize=(10,1.5))
plt.text(
    0.5, 0.5,
    "FINAL MARKET DECISION SUMMARY",
    ha='center', va='center',
    fontsize=14, fontweight='bold'
)
plt.axis('off')
plt.show()


## Final Market Outlook

Based on the combined analysis of historical price trends, machine learning predictions, technical indicators, and news sentiment, the AI system provides a consolidated market outlook.

The model integrates quantitative signals with qualitative sentiment to reduce reliance on a single indicator. This hybrid approach helps balance statistical prediction with contextual market understanding, resulting in a more robust decision-support framework.


## Confidence Level & Limitations

- The analysis is based on synthetic but statistically realistic market data.
- Financial markets are influenced by unpredictable macroeconomic and geopolitical factors.
- The model is designed for short-term trend analysis, not long-term investment decisions.
- Results should be interpreted as decision support rather than financial advice.

Despite these limitations, the structured AI pipeline demonstrates how multiple signals can be combined to improve market understanding.


In [None]:
# Confidence Scoring for Final Decision

confidence_score = 0

if ml_prediction == 1:
    confidence_score += 1

if average_sentiment > 0:
    confidence_score += 1

if data['Trend_Regime'].iloc[-1] == 'Bullish':
    confidence_score += 1

print("Confidence Scoring for Final Decision:-", confidence_score)


In [None]:
# Confidence-based interpretation

if confidence_score == 3:
    confidence_interpretation = "Strong Bullish Confidence"
elif confidence_score == 2:
    confidence_interpretation = "Moderate Bullish Bias"
elif confidence_score == 1:
    confidence_interpretation = "Weak / Uncertain Signal"
else:
    confidence_interpretation = "Strong Bearish Confidence"

print("Confidence-based interpretation:-", confidence_interpretation)


In [None]:
final_output = pd.DataFrame({
    "Final Decision": [market_decision],
    "Confidence Level (0–3)": [confidence_score],
    "Interpretation": [confidence_interpretation]
})

final_output

In [None]:
# Converting decision summary to formatted text
summary_text = "\n".join(
    f"{row['Component']}: {row['Result']}"
    for _, row in decision_summary.iterrows()
)

plt.figure(figsize=(8,3))
plt.text(
    0.5, 0.5,
    summary_text,
    ha='center', va='center',
    fontsize=11,
    bbox=dict(boxstyle="round,pad=0.6")
)
plt.axis('off')
plt.title("Final Market Decision Summary")
plt.show()
