In [9]:
import joblib
import numpy as np
import pandas as pd

In [10]:
model_path = '../../src/models/rf_regressor.joblib'
loaded_model = joblib.load(model_path)

model = loaded_model.named_steps['model']
prep = loaded_model.named_steps['preprocessor']

prep.feature_names_in_

array(['startup_stage', 'industry', 'region', 'requested_amount',
       'pre_money_valuation', 'team_size', 'founders_experience_years',
       'annual_revenue', 'investment_amount', 'requested_amount_log',
       'pre_money_valuation_log', 'team_size_log',
       'founders_experience_years_log', 'annual_revenue_log'],
      dtype=object)

In [11]:
new_startups = pd.DataFrame({
    'startup_stage': ['Series A', 'Pre-Seed'],
    'industry': ['HealthTech', 'E-commerce'],
    'region': ['US', 'LATAM'],
    'requested_amount': [2500000, 1000000],
    'pre_money_valuation': [12000000, 8000000],
    'team_size': [8, 5],
    'founders_experience_years': [7, 3],
    'annual_revenue': [120000, 0]
})

new_startups

Unnamed: 0,startup_stage,industry,region,requested_amount,pre_money_valuation,team_size,founders_experience_years,annual_revenue
0,Series A,HealthTech,US,2500000,12000000,8,7,120000
1,Pre-Seed,E-commerce,LATAM,1000000,8000000,5,3,0


In [12]:
# Логарифмирование

for feature in ['requested_amount', 'pre_money_valuation', 'team_size',
                'founders_experience_years', 'annual_revenue']:
    # Логарифмированная версия признаков, чтобы соответствовать обучению.
    new_startups[f'{feature}_log'] = np.log1p(new_startups.get(feature, 0))

In [13]:
startup_1 = pd.DataFrame(new_startups.iloc[0]).T
display(startup_1)

startup_2 = pd.DataFrame(new_startups.iloc[1]).T
display(startup_2)

Unnamed: 0,startup_stage,industry,region,requested_amount,pre_money_valuation,team_size,founders_experience_years,annual_revenue,requested_amount_log,pre_money_valuation_log,team_size_log,founders_experience_years_log,annual_revenue_log
0,Series A,HealthTech,US,2500000,12000000,8,7,120000,14.731802,16.300417,2.197225,2.079442,11.695255


Unnamed: 0,startup_stage,industry,region,requested_amount,pre_money_valuation,team_size,founders_experience_years,annual_revenue,requested_amount_log,pre_money_valuation_log,team_size_log,founders_experience_years_log,annual_revenue_log
1,Pre-Seed,E-commerce,LATAM,1000000,8000000,5,3,0,13.815512,15.894952,1.791759,1.386294,0.0


In [14]:
mae = 363268

In [15]:
log_pred = loaded_model.predict(startup_1)

# Переводим обратно в реальные суммы
real_pred = np.expm1(log_pred)
real_pred = round(real_pred[0])

# Границы с MAE в долларах
lower_border = real_pred - mae
upper_border = real_pred + mae

print(f'Оптимальный объём инвестиций: от ${lower_border:,} до ${upper_border:,} (лучший: ${real_pred:,})')

Оптимальный объём инвестиций: от $1,658,146 до $2,384,682 (лучший: $2,021,414)


In [16]:
log_pred = loaded_model.predict(startup_2)

# Переводим обратно в реальные суммы
real_pred = np.expm1(log_pred)

# print(real_pred)
real_pred= int(real_pred[0])
lower_border = real_pred - mae
upper_border = real_pred + mae

print(f'Оптимальный объём инвестиций: от ${lower_border:,} до ${upper_border:,} (лучший: ${real_pred:,})')

Оптимальный объём инвестиций: от $363,452 до $1,089,988 (лучший: $726,720)
