In [None]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-v0_8')

# Time series forecasting
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from prophet import Prophet

# Financial data
import yfinance as yf

# Model evaluation
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)


In [None]:
# Generate sample traffic flow data
def generate_traffic_data():
    """
    Generate realistic traffic flow data with seasonal patterns
    """
    # Create date range for 2 years of hourly data
    dates = pd.date_range(start='2022-01-01', end='2023-12-31', freq='H')
    
    # Base traffic pattern (higher during day, lower at night)
    base_pattern = 100 + 50 * np.sin(2 * np.pi * np.arange(len(dates)) / 24)
    
    # Weekly pattern (higher on weekdays)
    weekly_pattern = 20 * np.sin(2 * np.pi * np.arange(len(dates)) / (24 * 7))
    
    # Seasonal pattern (higher in summer)
    seasonal_pattern = 30 * np.sin(2 * np.pi * np.arange(len(dates)) / (24 * 365))
    
    # Add random noise
    noise = np.random.normal(0, 10, len(dates))
    
    # Combine all patterns
    traffic_flow = base_pattern + weekly_pattern + seasonal_pattern + noise
    
    # Ensure no negative values
    traffic_flow = np.maximum(traffic_flow, 10)
    
    # Create DataFrame
    traffic_df = pd.DataFrame({
        'datetime': dates,
        'traffic_flow': traffic_flow
    })
    
    return traffic_df

# Generate the data
traffic_data = generate_traffic_data()

In [None]:
# Load stock data for a major company (Apple as example)
def load_stock_data(symbol='AAPL', period='2y'):
    """
    Load historical stock data using yfinance
    """
    try:
        # Download stock data
        stock = yf.download(symbol, period=period, interval='1d')
        
        # Reset index to make date a column
        stock_df = stock.reset_index()
        
        # Select relevant columns
        stock_df = stock_df[['Date', 'Close', 'Volume']].copy()
        stock_df.columns = ['date', 'price', 'volume']
        
        print(f"Successfully loaded {symbol} stock data")
        return stock_df
        
    except Exception as e:
        print(f"Error loading stock data: {e}")
        return None

# Load Apple stock data
stock_data = load_stock_data('AAPL', '2y')

if stock_data is not None:
    print(f"Stock data shape: {stock_data.shape}")
    print("\nFirst few rows:")
    print(stock_data.head())

In [None]:
# Create comprehensive traffic flow visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Time series plot
axes[0, 0].plot(traffic_data['datetime'], traffic_data['traffic_flow'], alpha=0.7)
axes[0, 0].set_title('Traffic Flow Over Time')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Traffic Flow')
axes[0, 0].tick_params(axis='x', rotation=45)

# 2. Daily pattern (average by hour)
traffic_data['hour'] = traffic_data['datetime'].dt.hour
hourly_avg = traffic_data.groupby('hour')['traffic_flow'].mean()
axes[0, 1].plot(hourly_avg.index, hourly_avg.values, marker='o')
axes[0, 1].set_title('Average Traffic Flow by Hour of Day')
axes[0, 1].set_xlabel('Hour')
axes[0, 1].set_ylabel('Average Traffic Flow')

# 3. Weekly pattern
traffic_data['day_of_week'] = traffic_data['datetime'].dt.day_name()
daily_avg = traffic_data.groupby('day_of_week')['traffic_flow'].mean()
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_avg = daily_avg.reindex(day_order)
axes[1, 0].bar(range(len(daily_avg)), daily_avg.values)
axes[1, 0].set_title('Average Traffic Flow by Day of Week')
axes[1, 0].set_xlabel('Day of Week')
axes[1, 0].set_ylabel('Average Traffic Flow')
axes[1, 0].set_xticks(range(len(daily_avg)))
axes[1, 0].set_xticklabels(daily_avg.index, rotation=45)

# 4. Distribution
axes[1, 1].hist(traffic_data['traffic_flow'], bins=50, alpha=0.7, edgecolor='black')
axes[1, 1].set_title('Traffic Flow Distribution')
axes[1, 1].set_xlabel('Traffic Flow')
axes[1, 1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Visualize Stock Price Trends
if stock_data is not None:
    # Create stock price visualizations
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Stock price over time
    axes[0, 0].plot(stock_data['date'], stock_data['price'], linewidth=2)
    axes[0, 0].set_title('Stock Price Over Time')
    axes[0, 0].set_xlabel('Date')
    axes[0, 0].set_ylabel('Price ($)')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # 2. Volume over time
    axes[0, 1].plot(stock_data['date'], stock_data['volume'], color='orange', alpha=0.7)
    axes[0, 1].set_title('Trading Volume Over Time')
    axes[0, 1].set_xlabel('Date')
    axes[0, 1].set_ylabel('Volume')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # 3. Price distribution
    axes[1, 0].hist(stock_data['price'], bins=30, alpha=0.7, edgecolor='black')
    axes[1, 0].set_title('Stock Price Distribution')
    axes[1, 0].set_xlabel('Price ($)')
    axes[1, 0].set_ylabel('Frequency')
    
    # 4. Price vs Volume scatter
    axes[1, 1].scatter(stock_data['volume'], stock_data['price'], alpha=0.5)
    axes[1, 1].set_title('Price vs Trading Volume')
    axes[1, 1].set_xlabel('Volume')
    axes[1, 1].set_ylabel('Price ($)')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Prepare traffic data for SARIMA modeling
def prepare_sarima_data(df, target_col='traffic_flow', date_col='datetime'):
    """
    Prepare time series data for SARIMA modeling
    """
    # Set datetime as index
    ts_data = df.set_index(date_col)[target_col].copy()
    
    # Resample to daily frequency to reduce complexity
    daily_data = ts_data.resample('D').mean()
    
    return daily_data

# Prepare daily traffic data
daily_traffic = prepare_sarima_data(traffic_data)
print(f"Daily traffic data shape: {daily_traffic.shape}")
print("\nFirst few values:")
print(daily_traffic.head())

# Check for stationarity using Augmented Dickey-Fuller test
def check_stationarity(timeseries):
    """
    Check if time series is stationary using ADF test
    """
    result = adfuller(timeseries.dropna())
    
    print('Augmented Dickey-Fuller Test Results:')
    print(f'ADF Statistic: {result[0]:.6f}')
    print(f'p-value: {result[1]:.6f}')
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value:.3f}')
    
    if result[1] <= 0.05:
        print("Series is stationary")
        return True
    else:
        print("Series is not stationary")
        return False

# Test stationarity
is_stationary = check_stationarity(daily_traffic)

In [None]:
# Split data into train and test sets
def split_time_series(data, test_size=0.2):
    """
    Split time series data into train and test sets
    """
    split_point = int(len(data) * (1 - test_size))
    train_data = data[:split_point]
    test_data = data[split_point:]
    
    return train_data, test_data

# Split the data
train_traffic, test_traffic = split_time_series(daily_traffic)
print(f"Training data size: {len(train_traffic)}")
print(f"Testing data size: {len(test_traffic)}")

# Implement SARIMA model
def fit_sarima_model(train_data, order=(1,1,1), seasonal_order=(1,1,1,7)):
    """
    Fit SARIMA model to training data
    """
    try:
        # Fit SARIMA model
        model = SARIMAX(train_data, 
                       order=order, 
                       seasonal_order=seasonal_order,
                       enforce_stationarity=False,
                       enforce_invertibility=False)
        
        fitted_model = model.fit(disp=False)
        
        print("SARIMA model fitted successfully!")
        print(fitted_model.summary())
        
        return fitted_model
        
    except Exception as e:
        print(f"Error fitting SARIMA model: {e}")
        return None

# Fit SARIMA model
sarima_model = fit_sarima_model(train_traffic)

In [None]:
if sarima_model is not None:
    # Generate forecasts
    def generate_sarima_forecast(model, steps):
        """
        Generate forecasts using fitted SARIMA model
        """
        forecast = model.forecast(steps=steps)
        conf_int = model.get_forecast(steps=steps).conf_int()
        
        return forecast, conf_int
    
    # Generate forecasts for test period
    forecast_steps = len(test_traffic)
    sarima_forecast, conf_intervals = generate_sarima_forecast(sarima_model, forecast_steps)
    
    # Create forecast DataFrame
    forecast_df = pd.DataFrame({
        'date': test_traffic.index,
        'actual': test_traffic.values,
        'forecast': sarima_forecast.values,
        'lower_ci': conf_intervals.iloc[:, 0].values,
        'upper_ci': conf_intervals.iloc[:, 1].values
    })
    
    print("SARIMA Forecast Results:")
    print(forecast_df.head())
    
    # Visualize SARIMA results
    plt.figure(figsize=(15, 8))
    
    # Plot training data
    plt.plot(train_traffic.index, train_traffic.values, label='Training Data', color='blue')
    
    # Plot actual test data
    plt.plot(test_traffic.index, test_traffic.values, label='Actual', color='green', linewidth=2)
    
    # Plot forecasts
    plt.plot(forecast_df['date'], forecast_df['forecast'], label='SARIMA Forecast', color='red', linewidth=2)
    
    # Plot confidence intervals
    plt.fill_between(forecast_df['date'], 
                     forecast_df['lower_ci'], 
                     forecast_df['upper_ci'], 
                     alpha=0.3, color='red', label='Confidence Interval')
    
    plt.title('Traffic Flow Forecasting with SARIMA')
    plt.xlabel('Date')
    plt.ylabel('Traffic Flow')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Prepare data for Prophet (traffic data)
def prepare_prophet_data(df, date_col, value_col):
    """
    Prepare data for Facebook Prophet model
    """
    prophet_df = pd.DataFrame({
        'ds': df[date_col],
        'y': df[value_col]
    })
    
    return prophet_df

# Prepare traffic data for Prophet
prophet_traffic_data = prepare_prophet_data(
    traffic_data.resample('D', on='datetime').mean().reset_index(),
    'datetime', 
    'traffic_flow'
)

print("Prophet traffic data prepared:")
print(prophet_traffic_data.head())

# Split for Prophet
train_size = int(len(prophet_traffic_data) * 0.8)
prophet_train = prophet_traffic_data[:train_size]
prophet_test = prophet_traffic_data[train_size:]

print(f"Prophet training size: {len(prophet_train)}")
print(f"Prophet testing size: {len(prophet_test)}")

In [None]:
# Implement Prophet model for traffic forecasting
def fit_prophet_model(train_data):
    """
    Fit Prophet model to training data
    """
    try:
        # Initialize Prophet model
        model = Prophet(
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=True,
            changepoint_prior_scale=0.05
        )
        
        # Fit the model
        model.fit(train_data)
        
        print("Prophet model fitted successfully!")
        return model
        
    except Exception as e:
        print(f"Error fitting Prophet model: {e}")
        return None

# Fit Prophet model
prophet_traffic_model = fit_prophet_model(prophet_train)

if prophet_traffic_model is not None:
    # Generate forecasts
    future_dates = prophet_traffic_model.make_future_dataframe(periods=len(prophet_test))
    prophet_forecast = prophet_traffic_model.predict(future_dates)
    
    # Extract test period forecasts
    test_forecast = prophet_forecast.tail(len(prophet_test))
    
    # Visualize Prophet results
    fig, axes = plt.subplots(2, 1, figsize=(15, 12))
    
    # Plot 1: Overall forecast
    prophet_traffic_model.plot(prophet_forecast, ax=axes[0])
    axes[0].set_title('Traffic Flow Forecasting with Prophet - Full View')
    axes[0].set_xlabel('Date')
    axes[0].set_ylabel('Traffic Flow')
    
    # Plot 2: Test period comparison
    axes[1].plot(prophet_test['ds'], prophet_test['y'], label='Actual', color='blue', linewidth=2)
    axes[1].plot(test_forecast['ds'], test_forecast['yhat'], label='Prophet Forecast', color='red', linewidth=2)
    axes[1].fill_between(test_forecast['ds'], 
                        test_forecast['yhat_lower'], 
                        test_forecast['yhat_upper'], 
                        alpha=0.3, color='red', label='Confidence Interval')
    axes[1].set_title('Traffic Flow Forecasting - Test Period Comparison')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Traffic Flow')
    axes[1].legend()
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Show components
    fig_components = prophet_traffic_model.plot_components(prophet_forecast)
    plt.show()

In [None]:
 # Implement Prophet Model for Stock Prices
 if stock_data is not None:
    # Prepare stock data for Prophet
    prophet_stock_data = prepare_prophet_data(stock_data, 'date', 'price')
    
    # Split stock data
    stock_train_size = int(len(prophet_stock_data) * 0.8)
    prophet_stock_train = prophet_stock_data[:stock_train_size]
    prophet_stock_test = prophet_stock_data[stock_train_size:]
    
    # Fit Prophet model for stock prices
    prophet_stock_model = fit_prophet_model(prophet_stock_train)
    
    if prophet_stock_model is not None:
        # Generate stock price forecasts
        stock_future_dates = prophet_stock_model.make_future_dataframe(periods=len(prophet_stock_test))
        stock_prophet_forecast = prophet_stock_model.predict(stock_future_dates)
        
        # Extract test period forecasts
        stock_test_forecast = stock_prophet_forecast.tail(len(prophet_stock_test))
        
        # Visualize stock price Prophet results
        fig, axes = plt.subplots(2, 1, figsize=(15, 12))
        
        # Plot 1: Overall forecast
        prophet_stock_model.plot(stock_prophet_forecast, ax=axes[0])
        axes[0].set_title('Stock Price Forecasting with Prophet - Full View')
        axes[0].set_xlabel('Date')
        axes[0].set_ylabel('Stock Price ($)')
        
        # Plot 2: Test period comparison
        axes[1].plot(prophet_stock_test['ds'], prophet_stock_test['y'], 
                    label='Actual', color='blue', linewidth=2)
        axes[1].plot(stock_test_forecast['ds'], stock_test_forecast['yhat'], 
                    label='Prophet Forecast', color='red', linewidth=2)
        axes[1].fill_between(stock_test_forecast['ds'], 
                            stock_test_forecast['yhat_lower'], 
                            stock_test_forecast['yhat_upper'], 
                            alpha=0.3, color='red', label='Confidence Interval')
        axes[1].set_title('Stock Price Forecasting - Test Period Comparison')
        axes[1].set_xlabel('Date')
        axes[1].set_ylabel('Stock Price ($)')
        axes[1].legend()
        axes[1].tick_params(axis='x', rotation=45)
        
        plt.tight_layout()
        plt.show()

In [None]:
# Implement evaluation metrics
def calculate_mape(actual, predicted):
    """
    Calculate Mean Absolute Percentage Error (MAPE)
    """
    actual = np.array(actual)
    predicted = np.array(predicted)
    
    # Avoid division by zero
    mask = actual != 0
    mape = np.mean(np.abs((actual[mask] - predicted[mask]) / actual[mask])) * 100
    
    return mape

def calculate_rmse(actual, predicted):
    """
    Calculate Root Mean Square Error (RMSE)
    """
    return np.sqrt(mean_squared_error(actual, predicted))

def calculate_mae(actual, predicted):
    """
    Calculate Mean Absolute Error (MAE)
    """
    return mean_absolute_error(actual, predicted)

def evaluate_model(actual, predicted, model_name):
    """
    Comprehensive model evaluation
    """
    mape = calculate_mape(actual, predicted)
    rmse = calculate_rmse(actual, predicted)
    mae = calculate_mae(actual, predicted)
    
    print(f"\n{model_name} Model Evaluation:")
    print(f"MAPE: {mape:.2f}%")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")
    
    return {'MAPE': mape, 'RMSE': rmse, 'MAE': mae}

In [None]:
# Evaluate SARIMA model for traffic
if sarima_model is not None:
    sarima_metrics = evaluate_model(
        test_traffic.values, 
        sarima_forecast.values, 
        "SARIMA (Traffic)"
    )

# Evaluate Prophet model for traffic
if prophet_traffic_model is not None:
    prophet_traffic_metrics = evaluate_model(
        prophet_test['y'].values, 
        test_forecast['yhat'].values, 
        "Prophet (Traffic)"
    )

# Create comparison visualization for traffic models
if sarima_model is not None and prophet_traffic_model is not None:
    plt.figure(figsize=(15, 10))
    
    # Subplot 1: Actual vs Predicted comparison
    plt.subplot(2, 2, 1)
    plt.scatter(test_traffic.values, sarima_forecast.values, alpha=0.6, label='SARIMA')
    plt.scatter(prophet_test['y'].values, test_forecast['yhat'].values, alpha=0.6, label='Prophet')
    plt.plot([min(test_traffic.values), max(test_traffic.values)], 
             [min(test_traffic.values), max(test_traffic.values)], 'r--', label='Perfect Prediction')
    plt.xlabel('Actual Traffic Flow')
    plt.ylabel('Predicted Traffic Flow')
    plt.title('Actual vs Predicted - Traffic Models')
    plt.legend()
    
    # Subplot 2: Residuals for SARIMA
    plt.subplot(2, 2, 2)
    sarima_residuals = test_traffic.values - sarima_forecast.values
    plt.plot(sarima_residuals, marker='o', linestyle='-', alpha=0.7)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('SARIMA Residuals - Traffic')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    
    # Subplot 3: Residuals for Prophet
    plt.subplot(2, 2, 3)
    prophet_residuals = prophet_test['y'].values - test_forecast['yhat'].values
    plt.plot(prophet_residuals, marker='o', linestyle='-', alpha=0.7, color='orange')
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('Prophet Residuals - Traffic')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    
    # Subplot 4: Model comparison metrics
    plt.subplot(2, 2, 4)
    models = ['SARIMA', 'Prophet']
    mape_values = [sarima_metrics['MAPE'], prophet_traffic_metrics['MAPE']]
    rmse_values = [sarima_metrics['RMSE'], prophet_traffic_metrics['RMSE']]
    
    x = np.arange(len(models))
    width = 0.35
    
    plt.bar(x - width/2, mape_values, width, label='MAPE (%)', alpha=0.7)
    plt.bar(x + width/2, [r/10 for r in rmse_values], width, label='RMSE/10', alpha=0.7)
    
    plt.xlabel('Models')
    plt.ylabel('Error Values')
    plt.title('Model Performance Comparison - Traffic')
    plt.xticks(x, models)
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Evaluate Prophet model for stock prices
if prophet_stock_model is not None and stock_data is not None:
    stock_prophet_metrics = evaluate_model(
        prophet_stock_test['y'].values, 
        stock_test_forecast['yhat'].values, 
        "Prophet (Stock Price)"
    )
    
    # Create detailed stock price evaluation
    plt.figure(figsize=(15, 8))
    
    # Subplot 1: Actual vs Predicted
    plt.subplot(1, 2, 1)
    plt.scatter(prophet_stock_test['y'].values, stock_test_forecast['yhat'].values, alpha=0.6)
    plt.plot([min(prophet_stock_test['y'].values), max(prophet_stock_test['y'].values)], 
             [min(prophet_stock_test['y'].values), max(prophet_stock_test['y'].values)], 'r--')
    plt.xlabel('Actual Stock Price ($)')
    plt.ylabel('Predicted Stock Price ($)')
    plt.title('Actual vs Predicted - Stock Price')
    
    # Subplot 2: Residuals
    plt.subplot(1, 2, 2)
    stock_residuals = prophet_stock_test['y'].values - stock_test_forecast['yhat'].values
    plt.plot(stock_residuals, marker='o', linestyle='-', alpha=0.7)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('Prophet Residuals - Stock Price')
    plt.xlabel('Time')
    plt.ylabel('Residuals ($)')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Create a comprehensive results dashboard
def create_results_dashboard():
    """
    Create a comprehensive dashboard showing all results
    """
    fig = plt.figure(figsize=(20, 15))
    
    # Traffic Flow Results
    if sarima_model is not None and prophet_traffic_model is not None:
        # Traffic time series comparison
        ax1 = plt.subplot(3, 2, 1)
        plt.plot(test_traffic.index, test_traffic.values, label='Actual', linewidth=2)
        plt.plot(test_traffic.index, sarima_forecast.values, label='SARIMA', linewidth=2)
        plt.plot(prophet_test['ds'], test_forecast['yhat'].values, label='Prophet', linewidth=2)
        plt.title('Traffic Flow Forecasting Comparison')
        plt.xlabel('Date')
        plt.ylabel('Traffic Flow')
        plt.legend()
        plt.xticks(rotation=45)
        
        # Traffic metrics comparison
        ax2 = plt.subplot(3, 2, 2)
        metrics = ['MAPE', 'RMSE', 'MAE']
        sarima_vals = [sarima_metrics['MAPE'], sarima_metrics['RMSE'], sarima_metrics['MAE']]
        prophet_vals = [prophet_traffic_metrics['MAPE'], prophet_traffic_metrics['RMSE'], prophet_traffic_metrics['MAE']]
        
        x = np.arange(len(metrics))
        width = 0.35
        
        plt.bar(x - width/2, sarima_vals, width, label='SARIMA', alpha=0.7)
        plt.bar(x + width/2, prophet_vals, width, label='Prophet', alpha=0.7)
        plt.xlabel('Metrics')
        plt.ylabel('Values')
        plt.title('Traffic Model Performance Metrics')
        plt.xticks(x, metrics)
        plt.legend()
    
    # Stock Price Results
    if prophet_stock_model is not None and stock_data is not None:
        # Stock price forecast
        ax3 = plt.subplot(3, 2, 3)
        plt.plot(prophet_stock_test['ds'], prophet_stock_test['y'], label='Actual', linewidth=2)
        plt.plot(stock_test_forecast['ds'], stock_test_forecast['yhat'], label='Prophet Forecast', linewidth=2)
        plt.fill_between(stock_test_forecast['ds'], 
                        stock_test_forecast['yhat_lower'], 
                        stock_test_forecast['yhat_upper'], 
                        alpha=0.3, label='Confidence Interval')
        plt.title('Stock Price Forecasting')
        plt.xlabel('Date')
        plt.ylabel('Price ($)')
        plt.legend()
        plt.xticks(rotation=45)
        
        # Stock price error distribution
        ax4 = plt.subplot(3, 2, 4)
        stock_errors = prophet_stock_test['y'].values - stock_test_forecast['yhat'].values
        