## üìö Import Libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Core libraries loaded!")

# Try to import advanced forecasting libraries
try:
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa.seasonal import seasonal_decompose
    print("‚úÖ ARIMA available")
    ARIMA_AVAILABLE = True
except ImportError:
    print("‚ö†Ô∏è  ARIMA not available (install: pip install statsmodels)")
    ARIMA_AVAILABLE = False

try:
    from prophet import Prophet
    print("‚úÖ Prophet available")
    PROPHET_AVAILABLE = True
except ImportError:
    print("‚ö†Ô∏è  Prophet not available (install: pip install prophet)")
    PROPHET_AVAILABLE = False

print("\nüìä Ready for forecasting!")

‚úÖ Core libraries loaded!
‚úÖ ARIMA available
‚úÖ ARIMA available
‚úÖ Prophet available

üìä Ready for forecasting!
‚úÖ Prophet available

üìä Ready for forecasting!


## üìÇ Load Economic Data

In [2]:
# Load economic data
data_path = r"D:\project\dragon-fly-data\processdataset\economic_consolidated.csv"
df = pd.read_csv(data_path)

# Convert to numeric
numeric_cols = ['GDPTotalBillion', 'GDPGrowthRate', 'InflationRate', 'GDPPerCapita', 
                'UnemploymentRate', 'FDINetInflowsMillion', 'ExportsPercentGDP']

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Focus on post-Doi Moi period (1986+) for more stable patterns
df_model = df[df['Year'] >= 1986].copy()

print(f"üìä Data loaded: {len(df_model)} years ({df_model['Year'].min():.0f}-{df_model['Year'].max():.0f})")
print(f"\nAvailable indicators:")
for col in numeric_cols:
    valid_count = df_model[col].notna().sum()
    print(f"  ‚Ä¢ {col}: {valid_count}/{len(df_model)} values ({valid_count/len(df_model)*100:.1f}%)")

# Display recent data
print("\nüìà Recent 5 years:")
print(df_model[['Year', 'GDPTotalBillion', 'GDPGrowthRate', 'InflationRate']].tail().to_string(index=False))

üìä Data loaded: 39 years (1986-2024)

Available indicators:
  ‚Ä¢ GDPTotalBillion: 39/39 values (100.0%)
  ‚Ä¢ GDPGrowthRate: 39/39 values (100.0%)
  ‚Ä¢ InflationRate: 22/39 values (56.4%)
  ‚Ä¢ GDPPerCapita: 39/39 values (100.0%)
  ‚Ä¢ UnemploymentRate: 34/39 values (87.2%)
  ‚Ä¢ FDINetInflowsMillion: 39/39 values (100.0%)
  ‚Ä¢ ExportsPercentGDP: 38/39 values (97.4%)

üìà Recent 5 years:
 Year  GDPTotalBillion  GDPGrowthRate  InflationRate
 2020          346.616           2.87           3.22
 2021          366.475           2.55           1.83
 2022          413.445           8.54           3.16
 2023          433.858           5.07           3.25
 2024          476.388           7.09           3.62


## üéØ Ph·∫ßn 1: D·ª± B√°o GDP Total (2025-2030)

### 1.1 Linear & Polynomial Regression

In [3]:
# Prepare GDP data
gdp_data = df_model[['Year', 'GDPTotalBillion']].dropna()

# Split train/test (last 5 years for testing)
train_data = gdp_data[gdp_data['Year'] <= 2019]
test_data = gdp_data[gdp_data['Year'] > 2019]

X_train = train_data[['Year']].values
y_train = train_data['GDPTotalBillion'].values
X_test = test_data[['Year']].values
y_test = test_data['GDPTotalBillion'].values

# Future years to predict
future_years = np.array([[y] for y in range(2025, 2031)])

# Model 1: Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred_test = lr_model.predict(X_test)
lr_pred_future = lr_model.predict(future_years)

# Model 2: Polynomial Regression (degree 2)
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
X_future_poly = poly.transform(future_years)

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
poly_pred_test = poly_model.predict(X_test_poly)
poly_pred_future = poly_model.predict(X_future_poly)

# Model 3: Polynomial degree 3 (for comparison)
poly3 = PolynomialFeatures(degree=3)
X_train_poly3 = poly3.fit_transform(X_train)
X_future_poly3 = poly3.transform(future_years)

poly3_model = LinearRegression()
poly3_model.fit(X_train_poly3, y_train)
poly3_pred_future = poly3_model.predict(X_future_poly3)

# Calculate metrics
print("üìä Model Performance on Test Data (2020-2024):\n")
print(f"Linear Regression:")
print(f"  MAE: ${mean_absolute_error(y_test, lr_pred_test):.2f}B")
print(f"  RMSE: ${np.sqrt(mean_squared_error(y_test, lr_pred_test)):.2f}B")
print(f"  R¬≤: {r2_score(y_test, lr_pred_test):.4f}")

print(f"\nPolynomial (degree 2):")
print(f"  MAE: ${mean_absolute_error(y_test, poly_pred_test):.2f}B")
print(f"  RMSE: ${np.sqrt(mean_squared_error(y_test, poly_pred_test)):.2f}B")
print(f"  R¬≤: {r2_score(y_test, poly_pred_test):.4f}")

# Display predictions
print("\nüîÆ GDP Forecasts (2025-2030):\n")
forecast_df = pd.DataFrame({
    'Year': range(2025, 2031),
    'Linear': lr_pred_future,
    'Poly2': poly_pred_future,
    'Poly3': poly3_pred_future
})
print(forecast_df.to_string(index=False, float_format=lambda x: f'${x:.1f}B'))

üìä Model Performance on Test Data (2020-2024):

Linear Regression:
  MAE: $136.34B
  RMSE: $140.52B
  R¬≤: -8.0870

Polynomial (degree 2):
  MAE: $13.14B
  RMSE: $15.07B
  R¬≤: 0.8955

üîÆ GDP Forecasts (2025-2030):

 Year  Linear   Poly2   Poly3
 2025 $298.0B $512.3B $546.8B
 2026 $307.0B $545.3B $586.3B
 2027 $316.0B $579.4B $627.6B
 2028 $325.0B $614.5B $670.5B
 2029 $334.0B $650.7B $715.3B
 2030 $342.9B $687.9B $761.7B


### 1.2 ARIMA Model (if available)

In [4]:
if ARIMA_AVAILABLE:
    # Prepare time series data
    gdp_ts = gdp_data.set_index('Year')['GDPTotalBillion']
    
    # Fit ARIMA model (p,d,q) = (1,1,1) - common starting point
    try:
        arima_model = ARIMA(gdp_ts, order=(1, 1, 1))
        arima_fitted = arima_model.fit()
        
        # Forecast
        arima_forecast = arima_fitted.forecast(steps=6)
        arima_pred_future = arima_forecast.values
        
        print("‚úÖ ARIMA(1,1,1) Model Fitted\n")
        print("üîÆ ARIMA Forecasts (2025-2030):\n")
        for i, year in enumerate(range(2025, 2031)):
            print(f"  {year}: ${arima_pred_future[i]:.1f}B")
        
        # Add to forecast dataframe
        forecast_df['ARIMA'] = arima_pred_future
        
    except Exception as e:
        print(f"‚ö†Ô∏è  ARIMA fitting failed: {e}")
        arima_pred_future = None
else:
    print("‚ö†Ô∏è  ARIMA not available - install statsmodels for time series analysis")
    arima_pred_future = None

‚úÖ ARIMA(1,1,1) Model Fitted

üîÆ ARIMA Forecasts (2025-2030):

  2025: $506.8B
  2026: $536.7B
  2027: $566.2B
  2028: $595.3B
  2029: $624.0B
  2030: $652.3B


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(


### 1.3 Facebook Prophet (if available)

In [5]:
if PROPHET_AVAILABLE:
    # Prepare data for Prophet (needs 'ds' and 'y' columns)
    prophet_df = gdp_data.copy()
    prophet_df['ds'] = pd.to_datetime(prophet_df['Year'], format='%Y')
    prophet_df['y'] = prophet_df['GDPTotalBillion']
    prophet_df = prophet_df[['ds', 'y']]
    
    # Fit model
    try:
        prophet_model = Prophet(
            yearly_seasonality=False,
            weekly_seasonality=False,
            daily_seasonality=False,
            changepoint_prior_scale=0.05  # More flexible trend
        )
        prophet_model.fit(prophet_df)
        
        # Create future dataframe
        future = prophet_model.make_future_dataframe(periods=6, freq='Y')
        prophet_forecast = prophet_model.predict(future)
        
        # Extract predictions for 2025-2030
        prophet_pred_future = prophet_forecast[prophet_forecast['ds'].dt.year >= 2025]['yhat'].values[:6]
        
        print("‚úÖ Prophet Model Fitted\n")
        print("üîÆ Prophet Forecasts (2025-2030):\n")
        for i, year in enumerate(range(2025, 2031)):
            print(f"  {year}: ${prophet_pred_future[i]:.1f}B")
        
        # Add to forecast dataframe
        forecast_df['Prophet'] = prophet_pred_future
        
    except Exception as e:
        print(f"‚ö†Ô∏è  Prophet fitting failed: {e}")
        prophet_pred_future = None
else:
    print("‚ö†Ô∏è  Prophet not available - install: pip install prophet")
    prophet_pred_future = None

07:03:25 - cmdstanpy - INFO - Chain [1] start processing
07:03:25 - cmdstanpy - INFO - Chain [1] done processing
07:03:25 - cmdstanpy - INFO - Chain [1] done processing


‚úÖ Prophet Model Fitted

üîÆ Prophet Forecasts (2025-2030):

  2025: $497.7B
  2026: $520.8B
  2027: $543.9B
  2028: $567.0B
  2029: $590.1B
‚ö†Ô∏è  Prophet fitting failed: index 5 is out of bounds for axis 0 with size 5


### 1.4 Ensemble Forecast & Visualization

In [6]:
# Calculate ensemble (average of all models)
model_cols = [col for col in ['Linear', 'Poly2', 'Poly3', 'ARIMA', 'Prophet'] if col in forecast_df.columns]
forecast_df['Ensemble'] = forecast_df[model_cols].mean(axis=1)
forecast_df['Min'] = forecast_df[model_cols].min(axis=1)
forecast_df['Max'] = forecast_df[model_cols].max(axis=1)

print("\nüìä Final GDP Forecasts with Confidence Range:\n")
for idx, row in forecast_df.iterrows():
    print(f"{row['Year']:.0f}: ${row['Ensemble']:.1f}B (Range: ${row['Min']:.1f}B - ${row['Max']:.1f}B)")

# Create visualization
fig = go.Figure()

# Historical data
fig.add_trace(go.Scatter(
    x=gdp_data['Year'],
    y=gdp_data['GDPTotalBillion'],
    mode='lines+markers',
    name='L·ªãch s·ª≠ (1986-2024)',
    line=dict(color='#2c3e50', width=3),
    marker=dict(size=6)
))

# Add each model's forecast
colors = {'Linear': '#e74c3c', 'Poly2': '#3498db', 'Poly3': '#9b59b6', 
          'ARIMA': '#27ae60', 'Prophet': '#f39c12'}

for col in model_cols:
    fig.add_trace(go.Scatter(
        x=forecast_df['Year'],
        y=forecast_df[col],
        mode='lines+markers',
        name=f'{col} (2025-30)',
        line=dict(color=colors.get(col, '#95a5a6'), width=2, dash='dash'),
        marker=dict(size=8)
    ))

# Add ensemble forecast
fig.add_trace(go.Scatter(
    x=forecast_df['Year'],
    y=forecast_df['Ensemble'],
    mode='lines+markers',
    name='Ensemble (TB c√°c m√¥ h√¨nh)',
    line=dict(color='#16a085', width=4),
    marker=dict(size=10, symbol='diamond')
))

# Add confidence interval
fig.add_trace(go.Scatter(
    x=forecast_df['Year'].tolist() + forecast_df['Year'].tolist()[::-1],
    y=forecast_df['Max'].tolist() + forecast_df['Min'].tolist()[::-1],
    fill='toself',
    fillcolor='rgba(22, 160, 133, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Kho·∫£ng tin c·∫≠y',
    showlegend=True
))

# Add vertical line at 2024
fig.add_vline(
    x=2024.5,
    line_dash="solid",
    line_color="gray",
    line_width=2,
    annotation_text="D·ª± b√°o ‚Üí",
    annotation_position="top"
)

fig.update_layout(
    title={
        'text': 'üîÆ D·ª± B√°o GDP Vi·ªát Nam (2025-2030)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20}
    },
    xaxis_title='NƒÉm',
    yaxis_title='GDP (t·ª∑ USD)',
    height=600,
    template='plotly_white',
    hovermode='x unified',
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01,
        bgcolor="rgba(255,255,255,0.8)"
    )
)

fig.show()


üìä Final GDP Forecasts with Confidence Range:

2025: $466.0B (Range: $298.0B - $546.8B)
2026: $493.8B (Range: $307.0B - $586.3B)
2027: $522.3B (Range: $316.0B - $627.6B)
2028: $551.3B (Range: $325.0B - $670.5B)
2029: $581.0B (Range: $334.0B - $715.3B)
2030: $611.2B (Range: $342.9B - $761.7B)


## üìà Ph·∫ßn 2: D·ª± B√°o T·ªëc ƒê·ªô TƒÉng Tr∆∞·ªüng GDP

In [7]:
# Prepare growth rate data
growth_data = df_model[['Year', 'GDPGrowthRate']].dropna()

# Recent trend (2010-2024) - more relevant for forecasting
recent_growth = growth_data[growth_data['Year'] >= 2010]

X_growth = recent_growth[['Year']].values
y_growth = recent_growth['GDPGrowthRate'].values

# Linear model for growth rate
growth_model = LinearRegression()
growth_model.fit(X_growth, y_growth)

# Predict future growth rates
growth_pred_future = growth_model.predict(future_years)

# Calculate historical statistics
mean_growth = recent_growth['GDPGrowthRate'].mean()
std_growth = recent_growth['GDPGrowthRate'].std()

# Create scenarios
optimistic = growth_pred_future + std_growth
baseline = growth_pred_future
pessimistic = growth_pred_future - std_growth

print("üìä Historical Growth Statistics (2010-2024):\n")
print(f"  Average: {mean_growth:.2f}%")
print(f"  Std Dev: {std_growth:.2f}%")
print(f"  Min: {recent_growth['GDPGrowthRate'].min():.2f}% (COVID-19)")
print(f"  Max: {recent_growth['GDPGrowthRate'].max():.2f}%")

print("\nüîÆ Growth Rate Forecasts (2025-2030):\n")
scenario_df = pd.DataFrame({
    'Year': range(2025, 2031),
    'Optimistic': optimistic,
    'Baseline': baseline,
    'Pessimistic': pessimistic
})
print(scenario_df.to_string(index=False, float_format=lambda x: f'{x:.2f}%'))

# Visualization
fig = go.Figure()

# Historical data
fig.add_trace(go.Scatter(
    x=growth_data['Year'],
    y=growth_data['GDPGrowthRate'],
    mode='lines+markers',
    name='L·ªãch s·ª≠',
    line=dict(color='#2c3e50', width=3),
    marker=dict(size=6)
))

# Scenarios
scenarios = [
    ('Optimistic', optimistic, '#27ae60', 'L·∫°c quan'),
    ('Baseline', baseline, '#3498db', 'C∆° b·∫£n'),
    ('Pessimistic', pessimistic, '#e74c3c', 'Bi quan')
]

for name, values, color, label in scenarios:
    fig.add_trace(go.Scatter(
        x=scenario_df['Year'],
        y=values,
        mode='lines+markers',
        name=f'{label}',
        line=dict(color=color, width=3, dash='dash'),
        marker=dict(size=8)
    ))

# Add target line (6.5% - government target)
fig.add_hline(
    y=6.5,
    line_dash="dot",
    line_color="green",
    annotation_text="M·ª•c ti√™u Ch√≠nh ph·ªß (6.5%)",
    annotation_position="right"
)

fig.add_vline(x=2024.5, line_dash="solid", line_color="gray", line_width=2)

fig.update_layout(
    title={
        'text': 'üìà D·ª± B√°o T·ªëc ƒê·ªô TƒÉng Tr∆∞·ªüng GDP (2025-2030)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20}
    },
    xaxis_title='NƒÉm',
    yaxis_title='TƒÉng tr∆∞·ªüng GDP (%)',
    height=500,
    template='plotly_white',
    hovermode='x unified'
)

fig.show()

üìä Historical Growth Statistics (2010-2024):

  Average: 6.12%
  Std Dev: 1.63%
  Min: 2.55% (COVID-19)
  Max: 8.54%

üîÆ Growth Rate Forecasts (2025-2030):

 Year  Optimistic  Baseline  Pessimistic
 2025       7.49%     5.86%        4.23%
 2026       7.46%     5.83%        4.19%
 2027       7.43%     5.79%        4.16%
 2028       7.39%     5.76%        4.13%
 2029       7.36%     5.73%        4.09%
 2030       7.33%     5.69%        4.06%


## üí∞ Ph·∫ßn 3: D·ª± B√°o L·∫°m Ph√°t & Th·∫•t Nghi·ªáp

In [8]:
# Inflation forecast
inflation_data = df_model[['Year', 'InflationRate']].dropna()
recent_inflation = inflation_data[inflation_data['Year'] >= 2010]

X_inf = recent_inflation[['Year']].values
y_inf = recent_inflation['InflationRate'].values

inf_model = LinearRegression()
inf_model.fit(X_inf, y_inf)
inf_pred_future = inf_model.predict(future_years)

# Unemployment forecast
unemp_data = df_model[['Year', 'UnemploymentRate']].dropna()
if len(unemp_data) >= 10:
    recent_unemp = unemp_data[unemp_data['Year'] >= 2010]
    
    X_unemp = recent_unemp[['Year']].values
    y_unemp = recent_unemp['UnemploymentRate'].values
    
    unemp_model = LinearRegression()
    unemp_model.fit(X_unemp, y_unemp)
    unemp_pred_future = unemp_model.predict(future_years)
    
    unemp_available = True
else:
    unemp_available = False
    print("‚ö†Ô∏è  Insufficient unemployment data for forecasting")

# Create dual-axis chart
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('L·∫°m Ph√°t (%)', 'Th·∫•t Nghi·ªáp (%)' if unemp_available else 'Th·∫•t Nghi·ªáp (N/A)'),
    horizontal_spacing=0.12
)

# Inflation
fig.add_trace(
    go.Scatter(
        x=inflation_data['Year'],
        y=inflation_data['InflationRate'],
        mode='lines',
        name='L·ªãch s·ª≠',
        line=dict(color='#2c3e50', width=2)
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=future_years.flatten(),
        y=inf_pred_future,
        mode='lines+markers',
        name='D·ª± b√°o',
        line=dict(color='#e74c3c', width=3, dash='dash'),
        marker=dict(size=8)
    ),
    row=1, col=1
)

# Unemployment
if unemp_available:
    fig.add_trace(
        go.Scatter(
            x=unemp_data['Year'],
            y=unemp_data['UnemploymentRate'],
            mode='lines',
            name='L·ªãch s·ª≠',
            line=dict(color='#2c3e50', width=2),
            showlegend=False
        ),
        row=1, col=2
    )
    
    fig.add_trace(
        go.Scatter(
            x=future_years.flatten(),
            y=unemp_pred_future,
            mode='lines+markers',
            name='D·ª± b√°o',
            line=dict(color='#f39c12', width=3, dash='dash'),
            marker=dict(size=8),
            showlegend=False
        ),
        row=1, col=2
    )

fig.update_xaxes(title_text="NƒÉm", row=1, col=1)
fig.update_xaxes(title_text="NƒÉm", row=1, col=2)
fig.update_yaxes(title_text="%", row=1, col=1)
fig.update_yaxes(title_text="%", row=1, col=2)

fig.update_layout(
    title={
        'text': 'üí∞ D·ª± B√°o L·∫°m Ph√°t & Th·∫•t Nghi·ªáp',
        'x': 0.5,
        'xanchor': 'center'
    },
    height=400,
    template='plotly_white'
)

fig.show()

print("\nüí∞ Inflation Forecasts (2025-2030):\n")
for i, year in enumerate(range(2025, 2031)):
    print(f"  {year}: {inf_pred_future[i]:.2f}%")

if unemp_available:
    print("\nüë• Unemployment Forecasts (2025-2030):\n")
    for i, year in enumerate(range(2025, 2031)):
        print(f"  {year}: {unemp_pred_future[i]:.2f}%")


üí∞ Inflation Forecasts (2025-2030):

  2025: -0.02%
  2026: -0.65%
  2027: -1.29%
  2028: -1.92%
  2029: -2.56%
  2030: -3.19%

üë• Unemployment Forecasts (2025-2030):

  2025: 1.95%
  2026: 2.01%
  2027: 2.06%
  2028: 2.11%
  2029: 2.16%
  2030: 2.21%


## üåè Ph·∫ßn 4: D·ª± B√°o GDP Per Capita

In [9]:
# GDP per capita forecast
percap_data = df_model[['Year', 'GDPPerCapita']].dropna()

X_percap = percap_data[['Year']].values
y_percap = percap_data['GDPPerCapita'].values

# Use polynomial for per capita (captures accelerating growth)
poly_percap = PolynomialFeatures(degree=2)
X_percap_poly = poly_percap.fit_transform(X_percap)
X_future_percap = poly_percap.transform(future_years)

percap_model = LinearRegression()
percap_model.fit(X_percap_poly, y_percap)
percap_pred_future = percap_model.predict(X_future_percap)

# Calculate milestones
print("üìä GDP Per Capita Milestones:\n")
current_percap = percap_data.iloc[-1]['GDPPerCapita']
print(f"  2024: ${current_percap:.0f}")

for i, year in enumerate(range(2025, 2031)):
    value = percap_pred_future[i]
    milestone = ""
    if value >= 5000 and current_percap < 5000:
        milestone = " ‚≠ê Reach $5,000"
    elif value >= 6000 and percap_pred_future[i-1] < 6000 if i > 0 else False:
        milestone = " ‚≠ê Reach $6,000"
    elif value >= 7000 and percap_pred_future[i-1] < 7000 if i > 0 else False:
        milestone = " ‚≠ê Reach $7,000"
    
    print(f"  {year}: ${value:.0f}{milestone}")

# Visualization
fig = go.Figure()

# Historical
fig.add_trace(go.Bar(
    x=percap_data['Year'],
    y=percap_data['GDPPerCapita'],
    name='L·ªãch s·ª≠',
    marker_color='#3498db',
    opacity=0.7
))

# Forecast
fig.add_trace(go.Bar(
    x=future_years.flatten(),
    y=percap_pred_future,
    name='D·ª± b√°o',
    marker_color='#27ae60',
    opacity=0.7
))

# Add milestone lines
milestones = [5000, 6000, 7000]
for milestone in milestones:
    fig.add_hline(
        y=milestone,
        line_dash="dot",
        line_color="red",
        annotation_text=f"${milestone:,}",
        annotation_position="right"
    )

fig.update_layout(
    title={
        'text': 'üåè D·ª± B√°o GDP B√¨nh Qu√¢n ƒê·∫ßu Ng∆∞·ªùi (2025-2030)',
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title='NƒÉm',
    yaxis_title='GDP Per Capita (USD)',
    height=500,
    template='plotly_white',
    barmode='group'
)

fig.show()

üìä GDP Per Capita Milestones:

  2024: $4717
  2025: $5051 ‚≠ê Reach $5,000
  2026: $5361 ‚≠ê Reach $5,000
  2027: $5680 ‚≠ê Reach $5,000
  2028: $6009 ‚≠ê Reach $5,000
  2029: $6347 ‚≠ê Reach $5,000
  2030: $6695 ‚≠ê Reach $5,000


## üìä Ph·∫ßn 5: K·ªãch B·∫£n T·ªïng H·ª£p (2025-2030)

In [10]:
# Create comprehensive scenario analysis
scenarios_full = pd.DataFrame({
    'Year': range(2025, 2031)
})

# Baseline scenario (ensemble forecast)
scenarios_full['GDP_Baseline'] = forecast_df['Ensemble'].values
scenarios_full['Growth_Baseline'] = scenario_df['Baseline'].values
scenarios_full['Inflation_Baseline'] = inf_pred_future
scenarios_full['PerCapita_Baseline'] = percap_pred_future

# Optimistic scenario (+1 std dev on growth)
# Calculate optimistic GDP based on higher growth rates
gdp_2024 = gdp_data.iloc[-1]['GDPTotalBillion']
gdp_optimistic = [gdp_2024]
for growth in scenario_df['Optimistic'].values:
    gdp_optimistic.append(gdp_optimistic[-1] * (1 + growth/100))
scenarios_full['GDP_Optimistic'] = gdp_optimistic[1:]
scenarios_full['Growth_Optimistic'] = scenario_df['Optimistic'].values

# Pessimistic scenario (-1 std dev on growth)
gdp_pessimistic = [gdp_2024]
for growth in scenario_df['Pessimistic'].values:
    gdp_pessimistic.append(gdp_pessimistic[-1] * (1 + growth/100))
scenarios_full['GDP_Pessimistic'] = gdp_pessimistic[1:]
scenarios_full['Growth_Pessimistic'] = scenario_df['Pessimistic'].values

print("="*80)
print("  üéØ K·ªäCH B·∫¢N D·ª∞ B√ÅO KINH T·∫æ VI·ªÜT NAM (2025-2030)")
print("="*80)

print("\nüìä K·ªäCH B·∫¢N C∆† B·∫¢N (Baseline):")
print("\nNƒÉm | GDP (t·ª∑$) | TƒÉng tr∆∞·ªüng (%) | L·∫°m ph√°t (%) | GDP/ng∆∞·ªùi ($)")
print("-" * 70)
for idx, row in scenarios_full.iterrows():
    print(f"{row['Year']:.0f}  | ${row['GDP_Baseline']:>7.1f}    | "
          f"{row['Growth_Baseline']:>6.2f}          | "
          f"{row['Inflation_Baseline']:>5.2f}        | "
          f"${row['PerCapita_Baseline']:>6.0f}")

print("\nüìà K·ªäCH B·∫¢N L·∫†C QUAN (Optimistic):")
print("\nNƒÉm | GDP (t·ª∑$) | TƒÉng tr∆∞·ªüng (%)")
print("-" * 40)
for idx, row in scenarios_full.iterrows():
    print(f"{row['Year']:.0f}  | ${row['GDP_Optimistic']:>7.1f}    | {row['Growth_Optimistic']:>6.2f}")

print("\nüìâ K·ªäCH B·∫¢N BI QUAN (Pessimistic):")
print("\nNƒÉm | GDP (t·ª∑$) | TƒÉng tr∆∞·ªüng (%)")
print("-" * 40)
for idx, row in scenarios_full.iterrows():
    print(f"{row['Year']:.0f}  | ${row['GDP_Pessimistic']:>7.1f}    | {row['Growth_Pessimistic']:>6.2f}")

print("\n" + "="*80)
print("\nüéØ C√ÅC M·ªêC QUAN TR·ªåNG:\n")
gdp_2030_baseline = scenarios_full.iloc[-1]['GDP_Baseline']
gdp_2030_optimistic = scenarios_full.iloc[-1]['GDP_Optimistic']
gdp_2030_pessimistic = scenarios_full.iloc[-1]['GDP_Pessimistic']

print(f"  GDP 2030 (Baseline): ${gdp_2030_baseline:.1f}B")
print(f"  GDP 2030 (Optimistic): ${gdp_2030_optimistic:.1f}B")
print(f"  GDP 2030 (Pessimistic): ${gdp_2030_pessimistic:.1f}B")
print(f"\n  TƒÉng tr∆∞·ªüng TB 2025-2030: {scenarios_full['Growth_Baseline'].mean():.2f}%")
print(f"  L·∫°m ph√°t TB 2025-2030: {scenarios_full['Inflation_Baseline'].mean():.2f}%")
print(f"  GDP/ng∆∞·ªùi 2030: ${scenarios_full.iloc[-1]['PerCapita_Baseline']:.0f}")

# Calculate CAGR
years = 6
cagr_baseline = ((gdp_2030_baseline / gdp_2024) ** (1/years) - 1) * 100
cagr_optimistic = ((gdp_2030_optimistic / gdp_2024) ** (1/years) - 1) * 100
cagr_pessimistic = ((gdp_2030_pessimistic / gdp_2024) ** (1/years) - 1) * 100

print(f"\n  CAGR 2025-2030:")
print(f"    Baseline: {cagr_baseline:.2f}%")
print(f"    Optimistic: {cagr_optimistic:.2f}%")
print(f"    Pessimistic: {cagr_pessimistic:.2f}%")
print("\n" + "="*80)

  üéØ K·ªäCH B·∫¢N D·ª∞ B√ÅO KINH T·∫æ VI·ªÜT NAM (2025-2030)

üìä K·ªäCH B·∫¢N C∆† B·∫¢N (Baseline):

NƒÉm | GDP (t·ª∑$) | TƒÉng tr∆∞·ªüng (%) | L·∫°m ph√°t (%) | GDP/ng∆∞·ªùi ($)
----------------------------------------------------------------------
2025  | $  466.0    |   5.86          | -0.02        | $  5051
2026  | $  493.8    |   5.83          | -0.65        | $  5361
2027  | $  522.3    |   5.79          | -1.29        | $  5680
2028  | $  551.3    |   5.76          | -1.92        | $  6009
2029  | $  581.0    |   5.73          | -2.56        | $  6347
2030  | $  611.2    |   5.69          | -3.19        | $  6695

üìà K·ªäCH B·∫¢N L·∫†C QUAN (Optimistic):

NƒÉm | GDP (t·ª∑$) | TƒÉng tr∆∞·ªüng (%)
----------------------------------------
2025  | $  512.1    |   7.49
2026  | $  550.3    |   7.46
2027  | $  591.1    |   7.43
2028  | $  634.8    |   7.39
2029  | $  681.5    |   7.36
2030  | $  731.5    |   7.33

üìâ K·ªäCH B·∫¢N BI QUAN (Pessimistic):

NƒÉm | GDP (t·ª∑$) | TƒÉ

## üé® Ph·∫ßn 6: Dashboard T·ªïng Quan

In [11]:
# Create comprehensive dashboard
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'GDP Total (t·ª∑ USD)',
        'TƒÉng Tr∆∞·ªüng (%)',
        'GDP/Ng∆∞·ªùi (USD)',
        'L·∫°m Ph√°t (%)'
    ),
    vertical_spacing=0.12,
    horizontal_spacing=0.12
)

# 1. GDP Total - 3 scenarios
for scenario, color, name in [('GDP_Baseline', '#3498db', 'C∆° b·∫£n'),
                               ('GDP_Optimistic', '#27ae60', 'L·∫°c quan'),
                               ('GDP_Pessimistic', '#e74c3c', 'Bi quan')]:
    fig.add_trace(
        go.Scatter(
            x=scenarios_full['Year'],
            y=scenarios_full[scenario],
            mode='lines+markers',
            name=name,
            line=dict(color=color, width=2),
            showlegend=True
        ),
        row=1, col=1
    )

# 2. Growth Rate - 3 scenarios
for scenario, color in [('Growth_Baseline', '#3498db'),
                        ('Growth_Optimistic', '#27ae60'),
                        ('Growth_Pessimistic', '#e74c3c')]:
    fig.add_trace(
        go.Scatter(
            x=scenarios_full['Year'],
            y=scenarios_full[scenario],
            mode='lines+markers',
            line=dict(color=color, width=2),
            showlegend=False
        ),
        row=1, col=2
    )

# 3. Per Capita
fig.add_trace(
    go.Bar(
        x=scenarios_full['Year'],
        y=scenarios_full['PerCapita_Baseline'],
        marker_color='#9b59b6',
        showlegend=False
    ),
    row=2, col=1
)

# 4. Inflation
fig.add_trace(
    go.Scatter(
        x=scenarios_full['Year'],
        y=scenarios_full['Inflation_Baseline'],
        mode='lines+markers',
        line=dict(color='#f39c12', width=3),
        fill='tozeroy',
        fillcolor='rgba(243, 156, 18, 0.2)',
        showlegend=False
    ),
    row=2, col=2
)

# Update axes
for i in range(1, 3):
    for j in range(1, 3):
        fig.update_xaxes(title_text="NƒÉm", row=i, col=j)

fig.update_layout(
    title={
        'text': 'üé® Dashboard D·ª± B√°o Kinh T·∫ø Vi·ªát Nam (2025-2030)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22}
    },
    height=700,
    template='plotly_white',
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.show()

## üìù Ph·∫ßn 7: K·∫øt Lu·∫≠n & Khuy·∫øn Ngh·ªã

In [12]:
print("="*80)
print("  üìù K·∫æT LU·∫¨N & KHUY·∫æN NGH·ªä")
print("="*80)

print("\nüéØ D·ª∞ B√ÅO CH√çNH:\n")
print(f"  1. GDP 2030 d·ª± ki·∫øn ƒë·∫°t ${gdp_2030_baseline:.1f}B (baseline)")
print(f"     ‚Üí TƒÉng {((gdp_2030_baseline/gdp_2024 - 1) * 100):.1f}% so v·ªõi 2024")
print(f"\n  2. TƒÉng tr∆∞·ªüng TB 2025-2030: {scenarios_full['Growth_Baseline'].mean():.2f}%/nƒÉm")
print(f"     ‚Üí Cao h∆°n m·ª•c ti√™u Ch√≠nh ph·ªß (6.5%)" if scenarios_full['Growth_Baseline'].mean() > 6.5 else "     ‚Üí C·∫ßn n·ªó l·ª±c ƒë·ªÉ ƒë·∫°t m·ª•c ti√™u 6.5%")
print(f"\n  3. GDP/ng∆∞·ªùi 2030: ${scenarios_full.iloc[-1]['PerCapita_Baseline']:.0f}")
print(f"     ‚Üí TƒÉng {((scenarios_full.iloc[-1]['PerCapita_Baseline']/current_percap - 1) * 100):.1f}% so v·ªõi 2024")
print(f"\n  4. L·∫°m ph√°t d·ª± ki·∫øn: {scenarios_full['Inflation_Baseline'].mean():.2f}% (TB)")
print(f"     ‚Üí ·ªîn ƒë·ªãnh trong kho·∫£ng ki·ªÉm so√°t c·ªßa NHNN")

print("\n‚ö†Ô∏è  R·ª¶I RO C·∫¶N L∆ØU √ù:\n")
print("  ‚Ä¢ B·∫•t ·ªïn ƒë·ªãa ch√≠nh tr·ªã to√†n c·∫ßu")
print("  ‚Ä¢ Thay ƒë·ªïi ch√≠nh s√°ch th∆∞∆°ng m·∫°i qu·ªëc t·∫ø")
print("  ‚Ä¢ Bi·∫øn ƒë·ªông gi√° nguy√™n li·ªáu, nƒÉng l∆∞·ª£ng")
print("  ‚Ä¢ T√°c ƒë·ªông bi·∫øn ƒë·ªïi kh√≠ h·∫≠u")
print("  ‚Ä¢ C·∫°nh tranh khu v·ª±c gia tƒÉng")

print("\n‚úÖ KHUY·∫æN NGH·ªä:\n")
print("  1. ƒêa d·∫°ng h√≥a th·ªã tr∆∞·ªùng xu·∫•t kh·∫©u")
print("  2. ƒê·∫ßu t∆∞ m·∫°nh v√†o R&D, ƒë·ªïi m·ªõi s√°ng t·∫°o")
print("  3. N√¢ng cao ch·∫•t l∆∞·ª£ng ngu·ªìn nh√¢n l·ª±c")
print("  4. Ph√°t tri·ªÉn c√¥ng nghi·ªáp c√¥ng ngh·ªá cao")
print("  5. C·∫£i thi·ªán m√¥i tr∆∞·ªùng kinh doanh")
print("  6. Th√∫c ƒë·∫©y chuy·ªÉn ƒë·ªïi s·ªë, xanh h√≥a kinh t·∫ø")

print("\nüéì L∆ØU √ù V·ªÄ D·ª∞ B√ÅO:\n")
print("  ‚Ä¢ D·ª± b√°o d·ª±a tr√™n xu h∆∞·ªõng l·ªãch s·ª≠ (1986-2024)")
print("  ‚Ä¢ Gi·∫£ ƒë·ªãnh ƒëi·ªÅu ki·ªán kinh t·∫ø ·ªïn ƒë·ªãnh")
print("  ‚Ä¢ Kh√¥ng d·ª± ƒëo√°n ƒë∆∞·ª£c s·ª± ki·ªán b·∫•t th∆∞·ªùng (Black Swan)")
print("  ‚Ä¢ N√™n c·∫≠p nh·∫≠t m√¥ h√¨nh ƒë·ªãnh k·ª≥ v·ªõi d·ªØ li·ªáu m·ªõi")
print("  ‚Ä¢ S·ª≠ d·ª•ng k·∫øt h·ª£p nhi·ªÅu m√¥ h√¨nh ƒë·ªÉ tƒÉng ƒë·ªô tin c·∫≠y")

print("\n" + "="*80)
print("  ‚ú® Vi·ªát Nam c√≥ ti·ªÅm nƒÉng tƒÉng tr∆∞·ªüng m·∫°nh m·∫Ω ƒë·∫øn 2030")
print("  üöÄ C·∫ßn ch√≠nh s√°ch ph√π h·ª£p ƒë·ªÉ t·ªëi ∆∞u h√≥a c∆° h·ªôi ph√°t tri·ªÉn")
print("="*80)

  üìù K·∫æT LU·∫¨N & KHUY·∫æN NGH·ªä

üéØ D·ª∞ B√ÅO CH√çNH:

  1. GDP 2030 d·ª± ki·∫øn ƒë·∫°t $611.2B (baseline)
     ‚Üí TƒÉng 28.3% so v·ªõi 2024

  2. TƒÉng tr∆∞·ªüng TB 2025-2030: 5.78%/nƒÉm
     ‚Üí C·∫ßn n·ªó l·ª±c ƒë·ªÉ ƒë·∫°t m·ª•c ti√™u 6.5%

  3. GDP/ng∆∞·ªùi 2030: $6695
     ‚Üí TƒÉng 41.9% so v·ªõi 2024

  4. L·∫°m ph√°t d·ª± ki·∫øn: -1.61% (TB)
     ‚Üí ·ªîn ƒë·ªãnh trong kho·∫£ng ki·ªÉm so√°t c·ªßa NHNN

‚ö†Ô∏è  R·ª¶I RO C·∫¶N L∆ØU √ù:

  ‚Ä¢ B·∫•t ·ªïn ƒë·ªãa ch√≠nh tr·ªã to√†n c·∫ßu
  ‚Ä¢ Thay ƒë·ªïi ch√≠nh s√°ch th∆∞∆°ng m·∫°i qu·ªëc t·∫ø
  ‚Ä¢ Bi·∫øn ƒë·ªông gi√° nguy√™n li·ªáu, nƒÉng l∆∞·ª£ng
  ‚Ä¢ T√°c ƒë·ªông bi·∫øn ƒë·ªïi kh√≠ h·∫≠u
  ‚Ä¢ C·∫°nh tranh khu v·ª±c gia tƒÉng

‚úÖ KHUY·∫æN NGH·ªä:

  1. ƒêa d·∫°ng h√≥a th·ªã tr∆∞·ªùng xu·∫•t kh·∫©u
  2. ƒê·∫ßu t∆∞ m·∫°nh v√†o R&D, ƒë·ªïi m·ªõi s√°ng t·∫°o
  3. N√¢ng cao ch·∫•t l∆∞·ª£ng ngu·ªìn nh√¢n l·ª±c
  4. Ph√°t tri·ªÉn c√¥ng nghi·ªáp c√¥ng ngh·ªá cao
  5. C·∫£i thi·ªán m√¥i tr∆∞·ªùng kinh doanh
  6. Th√∫c ƒë·∫©y

## üìö Ph·ª• L·ª•c: Model Comparison

In [13]:
# Compare model predictions visually
print("üìä SO S√ÅNH C√ÅC M√î H√åNH D·ª∞ B√ÅO GDP:\n")

comparison = forecast_df[['Year'] + model_cols + ['Ensemble']].copy()
print(comparison.to_string(index=False, float_format=lambda x: f'${x:.1f}B'))

print("\nüìà ƒê·ªô ch√™nh l·ªách gi·ªØa c√°c m√¥ h√¨nh:\n")
for year in range(2025, 2031):
    row = comparison[comparison['Year'] == year]
    values = row[model_cols].values[0]
    spread = values.max() - values.min()
    spread_pct = (spread / values.mean()) * 100
    print(f"{year}: ${spread:.1f}B ({spread_pct:.1f}%)")

print("\n‚úÖ M√¥ h√¨nh Ensemble (trung b√¨nh) th∆∞·ªùng cho k·∫øt qu·∫£ ·ªïn ƒë·ªãnh nh·∫•t!")

üìä SO S√ÅNH C√ÅC M√î H√åNH D·ª∞ B√ÅO GDP:

 Year  Linear   Poly2   Poly3   ARIMA  Ensemble
 2025 $298.0B $512.3B $546.8B $506.8B   $466.0B
 2026 $307.0B $545.3B $586.3B $536.7B   $493.8B
 2027 $316.0B $579.4B $627.6B $566.2B   $522.3B
 2028 $325.0B $614.5B $670.5B $595.3B   $551.3B
 2029 $334.0B $650.7B $715.3B $624.0B   $581.0B
 2030 $342.9B $687.9B $761.7B $652.3B   $611.2B

üìà ƒê·ªô ch√™nh l·ªách gi·ªØa c√°c m√¥ h√¨nh:

2025: $248.8B (53.4%)
2026: $279.4B (56.6%)
2027: $311.6B (59.7%)
2028: $345.6B (62.7%)
2029: $381.3B (65.6%)
2030: $418.8B (68.5%)

‚úÖ M√¥ h√¨nh Ensemble (trung b√¨nh) th∆∞·ªùng cho k·∫øt qu·∫£ ·ªïn ƒë·ªãnh nh·∫•t!
