# ARIMA + ESG Portfolio Optimizer

This notebook demonstrates how to forecast monthly stock returns with ARIMA models and construct an ESG-aware portfolio using mean-variance optimization with a soft ESG tilt.

## 1️⃣ Import Libraries

We begin by loading the lightweight libraries required for data handling, forecasting, and optimization.

In [None]:
# Import core numerical and data manipulation libraries
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt

# Data acquisition for historical prices and ESG information
import yfinance as yf

# ARIMA modeling for time series forecasting
from pmdarima import auto_arima

# Optimization tools for portfolio construction
from scipy.optimize import minimize

# Configure matplotlib aesthetics for clearer plots
plt.style.use('seaborn-v0_8-darkgrid')

## 2️⃣ Data Setup

Download five years of monthly adjusted close prices for a diversified list of large-cap stocks and compute monthly percentage returns.

In [None]:
# Define the investment universe and data parameters
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM', 'UNH', 'PG']
start_period = '5y'
price_interval = '1mo'

# Download monthly adjusted close prices from Yahoo Finance
data = yf.download(tickers, period=start_period, interval=price_interval, auto_adjust=False)['Adj Close']

# Clean the data: drop rows with all missing values and ensure chronological order
data = data.dropna(how='all').sort_index()

# Compute monthly percentage returns
returns = data.pct_change().dropna(how='all')

# Display the first few rows for inspection
returns.head()

## 3️⃣ ESG Scores

Retrieve ESG scores from Yahoo Finance where available. If the data cannot be retrieved (e.g., due to missing coverage), fall back to predefined mock scores on a 0–100 scale.

In [None]:
# Attempt to fetch sustainability scores from Yahoo Finance; fall back to manual values when necessary
def fetch_esg_scores(ticker_list):
    esg_values = {}
    for ticker in ticker_list:
        try:
            sustainability = yf.Ticker(ticker).sustainability
            if sustainability is not None and 'totalEsg' in sustainability.index:
                score = sustainability.loc['totalEsg'].iloc[0]
                # Yahoo Finance returns ESG risk (lower is better). Convert to a 0-100 score.
                # We map risk scores (roughly 0-50) to an ESG score where lower risk => higher ESG score.
                if pd.notnull(score):
                    esg_values[ticker] = max(0, min(100, 100 - score))
                    continue
        except Exception:
            pass
    return esg_values

fetched_esg = fetch_esg_scores(tickers)

# Provide fallback ESG scores for tickers without available data
fallback_esg = {
    'AAPL': 72,
    'MSFT': 83,
    'GOOGL': 78,
    'AMZN': 65,
    'META': 60,
    'TSLA': 55,
    'NVDA': 68,
    'JPM': 58,
    'UNH': 70,
    'PG': 82,
}

# Merge fetched and fallback scores, prioritizing fetched values
esg_scores = {ticker: fetched_esg.get(ticker, fallback_esg[ticker]) for ticker in tickers}
esg_df = pd.DataFrame.from_dict(esg_scores, orient='index', columns=['ESG_Score'])
esg_df

## 4️⃣ ARIMA Forecasts

Fit automatic ARIMA models to each stock's monthly return series to forecast next month's expected return.

In [None]:
# Fit an ARIMA model per ticker to forecast the next period return
forecast_means = {}
arima_models = {}

for ticker in tickers:
    series = returns[ticker].dropna()
    if len(series) < 12:
        # Not enough data to fit ARIMA; assume zero expected return
        forecast_means[ticker] = 0.0
        arima_models[ticker] = None
        continue

    model = auto_arima(
        series,
        seasonal=False,
        stepwise=True,
        suppress_warnings=True,
        error_action='ignore',
        trace=False
    )

    forecast = model.predict(n_periods=1)[0]
    forecast_means[ticker] = forecast
    arima_models[ticker] = model

forecast_series = pd.Series(forecast_means)
forecast_series

## 5️⃣ Mean-Variance Optimization with ESG Tilt

Construct a portfolio that balances expected return, risk, and ESG exposure. The objective maximizes expected return while penalizing variance and rewarding higher ESG scores.

In [None]:
# Compute the historical covariance matrix of returns
cov_matrix = returns.cov()

# Define optimization hyperparameters
risk_aversion = 0.5  # λ parameter balancing return vs. risk
esg_penalty = 0.3    # α parameter controlling ESG tilt strength

# Convert data to aligned numpy arrays
mu = forecast_series.loc[tickers].values
esg_array = esg_df.loc[tickers, 'ESG_Score'].values / 100

# Objective function for minimization (negative of the utility expression)
def portfolio_objective(weights):
    weights = np.array(weights)
    portfolio_return = np.dot(weights, mu)
    portfolio_variance = np.dot(weights, cov_matrix @ weights)
    esg_bonus = np.dot(weights, esg_array)
    utility = portfolio_return - risk_aversion * portfolio_variance - esg_penalty * esg_bonus
    return -utility

# Constraints: weights sum to 1
constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})

# Bounds: no short selling (weights between 0 and 1)
bounds = [(0.0, 1.0) for _ in tickers]

# Initial guess: equal weights
initial_weights = np.repeat(1 / len(tickers), len(tickers))

# Run the optimization
optimization_result = minimize(
    portfolio_objective,
    initial_weights,
    method='SLSQP',
    bounds=bounds,
    constraints=constraints
)

if not optimization_result.success:
    raise RuntimeError(f"Optimization failed: {optimization_result.message}")

optimal_weights = optimization_result.x
optimal_weights

## 6️⃣ Results

Summarize the optimized portfolio and visualize the allocation. Forecasted returns are shown alongside ESG scores and the resulting weights.

In [None]:
# Compile results into a summary DataFrame
summary_df = pd.DataFrame({
    'Ticker': tickers,
    'ESG_Score': esg_df.loc[tickers, 'ESG_Score'].values,
    'Forecasted_Return': forecast_series.loc[tickers].values,
    'Optimal_Weight': optimal_weights,
})

summary_df

In [None]:
# Plot bar chart of optimal weights
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(summary_df['Ticker'], summary_df['Optimal_Weight'], color='steelblue')
ax.set_title('Optimal Portfolio Weights')
ax.set_xlabel('Ticker')
ax.set_ylabel('Weight')
ax.set_ylim(0, summary_df['Optimal_Weight'].max() * 1.2)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Plot pie chart of weight allocation
fig, ax = plt.subplots(figsize=(8, 8))
ax.pie(summary_df['Optimal_Weight'], labels=summary_df['Ticker'], autopct='%1.1f%%', startangle=140)
ax.set_title('Portfolio Allocation by Weight')
plt.show()

In [None]:
# Optionally visualize historical returns and ARIMA forecast for a subset of tickers
example_tickers = tickers[:3]
months_to_plot = 36

fig, axes = plt.subplots(len(example_tickers), 1, figsize=(10, 8), sharex=True)

for ax, ticker in zip(axes, example_tickers):
    series = returns[ticker].dropna()
    plot_series = series.tail(months_to_plot)
    ax.plot(plot_series.index, plot_series.values, label='Historical Returns', marker='o')
    if arima_models[ticker] is not None:
        forecast_value = forecast_series.loc[ticker]
        ax.axhline(forecast_value, color='red', linestyle='--', label='Forecasted Return')
    ax.set_title(f"{ticker} Monthly Returns")
    ax.set_ylabel('Return')
    ax.legend()

plt.xlabel('Date')
plt.tight_layout()
plt.show()

## 7️⃣ Conclusion

The optimizer combines ARIMA-based return forecasts with mean-variance optimization and an ESG tilt. High-ESG companies receive a larger weight allocation when expected returns and risk are comparable, resulting in a portfolio that balances financial performance with sustainability considerations.