# Data Preparation 

In [8]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Load your data
df = pd.read_csv('synthetic_stock_data.csv')

# Define our states
states = ['Bullish', 'Bearish', 'Stable']
print(f"Available states: {states}")
print(f"Total records: {len(df)}")

Available states: ['Bullish', 'Bearish', 'Stable']
Total records: 1000


In [9]:
def count_transitions(df):
    """
    Count transitions between states across all companies
    """
    transition_counts = defaultdict(lambda: defaultdict(int))
    
    # Group by company to maintain temporal order
    for company in df['Company'].unique():
        company_data = df[df['Company'] == company].sort_values('Date')
        trends = company_data['Trend'].tolist()
        
        # Count transitions for this company
        for i in range(len(trends) - 1):
            current_state = trends[i]
            next_state = trends[i + 1]
            transition_counts[current_state][next_state] += 1
    
    return transition_counts

# Get transition counts
transition_counts = count_transitions(df)
print("Transition Counts:")
for from_state in states:
    for to_state in states:
        count = transition_counts[from_state][to_state]
        print(f"  {from_state} → {to_state}: {count}")

Transition Counts:
  Bullish → Bullish: 106
  Bullish → Bearish: 98
  Bullish → Stable: 107
  Bearish → Bullish: 106
  Bearish → Bearish: 97
  Bearish → Stable: 101
  Stable → Bullish: 100
  Stable → Bearish: 106
  Stable → Stable: 116


In [10]:
def build_transition_matrix(transition_counts, states):
    """
    Convert counts to probabilities
    """
    n_states = len(states)
    transition_matrix = np.zeros((n_states, n_states))
    
    # Create state to index mapping
    state_to_idx = {state: idx for idx, state in enumerate(states)}
    
    # Fill the matrix with probabilities
    for i, from_state in enumerate(states):
        total_from = sum(transition_counts[from_state].values())
        
        for j, to_state in enumerate(states):
            if total_from > 0:
                transition_matrix[i][j] = transition_counts[from_state][to_state] / total_from
            else:
                transition_matrix[i][j] = 0
    
    return transition_matrix, state_to_idx

# Build the 3x3 transition matrix
transition_matrix, state_to_idx = build_transition_matrix(transition_counts, states)

print("\nTransition Probability Matrix (3x3):")
print("Rows: [Bullish, Bearish, Stable]")
print("Columns: [Bullish, Bearish, Stable]")
print("\n" + "="*50)
for i, from_state in enumerate(states):
    row = [f"{transition_matrix[i][j]:.3f}" for j in range(len(states))]
    print(f"{from_state:>8} → [ {', '.join(row)} ]")


Transition Probability Matrix (3x3):
Rows: [Bullish, Bearish, Stable]
Columns: [Bullish, Bearish, Stable]

 Bullish → [ 0.341, 0.315, 0.344 ]
 Bearish → [ 0.349, 0.319, 0.332 ]
  Stable → [ 0.311, 0.329, 0.360 ]


In [11]:
def validate_transition_matrix(transition_matrix):
    """
    Check if each row sums to 1 (proper probability distribution)
    """
    print("\n" + "="*50)
    print("VALIDATION:")
    print("="*50)
    
    for i, state in enumerate(states):
        row_sum = np.sum(transition_matrix[i])
        print(f"{state} row sum: {row_sum:.6f} {'✓' if abs(row_sum - 1.0) < 0.0001 else '✗'}")
    
    print(f"\nMatrix Shape: {transition_matrix.shape}")
    print(f"Dimension: {transition_matrix.shape[0]}x{transition_matrix.shape[1]}")

validate_transition_matrix(transition_matrix)


VALIDATION:
Bullish row sum: 1.000000 ✓
Bearish row sum: 1.000000 ✓
Stable row sum: 1.000000 ✓

Matrix Shape: (3, 3)
Dimension: 3x3


In [12]:
def predict_next_state(current_state, transition_matrix, state_to_idx):
    """
    Predict the next state given current state
    """
    current_idx = state_to_idx[current_state]
    probabilities = transition_matrix[current_idx]
    
    # Get the most likely next state
    next_idx = np.argmax(probabilities)
    next_state = states[next_idx]
    confidence = probabilities[next_idx]
    
    return next_state, confidence

def simulate_markov_chain(initial_state, transition_matrix, state_to_idx, steps=5):
    """
    Simulate multiple steps of the Markov chain
    """
    current_state = initial_state
    path = [current_state]
    
    print(f"\nMARKOV CHAIN SIMULATION (Starting from: {initial_state})")
    print("="*40)
    
    for step in range(steps):
        next_state, confidence = predict_next_state(current_state, transition_matrix, state_to_idx)
        print(f"Step {step+1}: {current_state} → {next_state} (confidence: {confidence:.3f})")
        current_state = next_state
        path.append(current_state)
    
    return path

# Example usage
current_market_state = "Bullish"  # You can change this
simulation_path = simulate_markov_chain(current_market_state, transition_matrix, state_to_idx)


MARKOV CHAIN SIMULATION (Starting from: Bullish)
Step 1: Bullish → Stable (confidence: 0.344)
Step 2: Stable → Stable (confidence: 0.360)
Step 3: Stable → Stable (confidence: 0.360)
Step 4: Stable → Stable (confidence: 0.360)
Step 5: Stable → Stable (confidence: 0.360)


In [13]:
def find_steady_state(transition_matrix, tolerance=1e-8, max_iterations=1000):
    """
    Find the steady-state distribution (long-term probabilities)
    """
    n_states = transition_matrix.shape[0]
    # Start with uniform distribution
    steady = np.ones(n_states) / n_states
    
    for i in range(max_iterations):
        new_steady = steady @ transition_matrix
        if np.max(np.abs(new_steady - steady)) < tolerance:
            break
        steady = new_steady
    
    return steady

steady_state = find_steady_state(transition_matrix)
print("\n" + "="*50)
print("STEADY-STATE DISTRIBUTION (Long-term probabilities):")
print("="*50)
for i, state in enumerate(states):
    print(f"{state}: {steady_state[i]:.4f} ({steady_state[i]*100:.2f}%)")


STEADY-STATE DISTRIBUTION (Long-term probabilities):
Bullish: 0.3329 (33.29%)
Bearish: 0.3213 (32.13%)
Stable: 0.3459 (34.59%)
