In [None]:
pip install yfinance pandas numpy pmdarima scikit-learn

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime, timedelta

def get_historical_revenue(ticker, start_date, end_date):
    company = yf.Ticker(ticker)
    financials = company.financials.T  # dates as index
    revenue = financials['Total Revenue']
    revenue = revenue[(revenue.index >= start_date) & (revenue.index <= end_date)]
    revenue = revenue.sort_index().astype(float) / 1e9  # convert to billions

    if revenue.empty:
        raise ValueError("No revenue data for that date range.")

    print(f"Retrieved revenue data (billions USD):\n{revenue}")
    return revenue

def predict_revenue(ticker, years=5):
    end_date = datetime.now().strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=365*10)).strftime('%Y-%m-%d')

    revenue = get_historical_revenue(ticker, start_date, end_date)

    if len(revenue) < 4:
        raise ValueError(f"Not enough historical data. Got {len(revenue)} points.")

    revenue.index = pd.to_datetime(revenue.index)  # make sure index is datetime

    if revenue.index.inferred_freq != 'A-DEC':
        revenue = revenue.resample('A-DEC').last()  # resample to yearly

    print(f"Prepared revenue data for prediction:\n{revenue}")

    model = auto_arima(revenue.values, seasonal=False, stepwise=True, suppress_warnings=True, error_action="ignore")
    forecast = model.predict(n_periods=years)  # forecast revenue

    future_dates = pd.date_range(start=revenue.index[-1] + pd.DateOffset(years=1), periods=years, freq='A-DEC')
    predicted_revenue = pd.Series(forecast, index=future_dates)

    return predicted_revenue

def test_model(ticker):
    current_date = datetime(2024, 8, 28)
    past_date = current_date - timedelta(days=365*10)  # get 10 years of data

    actual_revenue = get_historical_revenue(ticker, past_date.strftime('%Y-%m-%d'), current_date.strftime('%Y-%m-%d'))
    print(f"Actual revenue data (billions USD):\n{actual_revenue}")

    if len(actual_revenue) < 4:
        raise ValueError(f"Not enough historical data for testing. Got {len(actual_revenue)} points.")

    if len(actual_revenue) > 1:
        train_data = actual_revenue.iloc[:-1]
        test_data = actual_revenue.iloc[-1:]

        model = auto_arima(train_data.values, seasonal=False, stepwise=True, suppress_warnings=True, error_action="ignore")
        prediction = model.predict(n_periods=1)
        prediction = pd.Series(prediction, index=test_data.index)

        print(f"prediction for testing:\n{prediction}")

        comparison = pd.concat([test_data, prediction], axis=1)
        comparison.columns = ['Actual', 'Predicted']

        mse = mean_squared_error(comparison['Actual'], comparison['Predicted'])
        r2 = r2_score(comparison['Actual'], comparison['Predicted'])

        print(f"MSE: {mse}")
        print(f"R² Score: {r2}")
        print("\nComparison (billions USD):")
        print(comparison)
    else:
        print("Not enough data for training/testing split.")

ticker = 'AAPL'  # can be any ticker
try:
    future_revenue = predict_revenue(ticker)
    print(f"5-Year Revenue Prediction for {ticker} (billions USD):")
    print(future_revenue)

    print("\nTesting model with historical data:")
    test_model(ticker)
except Exception as e:
    print(f"Error: {e}")
    print("Try a different company or time range.")


Retrieved revenue data (in billions USD):
2020-09-30    274.515
2021-09-30    365.817
2022-09-30    394.328
2023-09-30    383.285
Name: Total Revenue, dtype: float64
Prepared revenue data for prediction (in billions USD):
2020-12-31    274.515
2021-12-31    365.817
2022-12-31    394.328
2023-12-31    383.285
Freq: A-DEC, Name: Total Revenue, dtype: float64
5-Year Revenue Prediction for AAPL (in billions USD):
2024-12-31    354.48625
2025-12-31    354.48625
2026-12-31    354.48625
2027-12-31    354.48625
2028-12-31    354.48625
Freq: A-DEC, dtype: float64

Testing the model with historical data:
Retrieved revenue data (in billions USD):
2020-09-30    274.515
2021-09-30    365.817
2022-09-30    394.328
2023-09-30    383.285
Name: Total Revenue, dtype: float64
Retrieved actual revenue data for testing (in billions USD):
2020-09-30    274.515
2021-09-30    365.817
2022-09-30    394.328
2023-09-30    383.285
Name: Total Revenue, dtype: float64
Prediction for testing (in billions USD):
2023-

