In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from sklearn import model_selection, datasets, linear_model, metrics
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.linear_model import LinearRegression #Линейная регрессия
from sklearn.ensemble import RandomForestRegressor #Случайный лес
from sklearn.linear_model import Ridge #Ридж регрессия
from sklearn.linear_model import Lasso #Лассо регрессия
from sklearn.svm import SVR #метод опорных векторов
from sklearn.linear_model import BayesianRidge #байесовская регрессия
from sklearn.neighbors import KNeighborsRegressor as KNNr #K-ближайших соседей
from sklearn.ensemble import GradientBoostingRegressor #Градиентный бустинг

from scipy.optimize import linprog

# Первичный взгляд на данные

In [4]:
train = pd.read_csv('train.csv')
train.head()

Unnamed: 0,galactic year,galaxy,existence expectancy index,existence expectancy at birth,Gross income per capita,Income Index,Expected years of education (galactic years),Mean years of education (galactic years),Intergalactic Development Index (IDI),Education Index,...,"Intergalactic Development Index (IDI), female","Intergalactic Development Index (IDI), male",Gender Development Index (GDI),"Intergalactic Development Index (IDI), female, Rank","Intergalactic Development Index (IDI), male, Rank",Adjusted net savings,"Creature Immunodeficiency Disease prevalence, adult (% ages 15-49), total",Private galaxy capital flows (% of GGP),Gender Inequality Index (GII),y
0,990025,Large Magellanic Cloud (LMC),0.628657,63.1252,27109.23431,0.646039,8.240543,,,,...,,,,,,,,,,0.05259
1,990025,Camelopardalis B,0.818082,81.004994,30166.793958,0.852246,10.671823,4.74247,0.833624,0.467873,...,,,,,,19.177926,,22.785018,,0.059868
2,990025,Virgo I,0.659443,59.570534,8441.707353,0.499762,8.840316,5.583973,0.46911,0.363837,...,,,,,,21.151265,6.53402,,,0.050449
3,990025,UGC 8651 (DDO 181),0.555862,52.333293,,,,,,,...,,,,,,,5.912194,,,0.049394
4,990025,Tucana Dwarf,0.991196,81.802464,81033.956906,1.131163,13.800672,13.188907,0.910341,0.918353,...,,,,,,,5.611753,,,0.154247


In [5]:
test = pd.read_csv('test.csv')
test.head()

Unnamed: 0,galactic year,galaxy,existence expectancy index,existence expectancy at birth,Gross income per capita,Income Index,Expected years of education (galactic years),Mean years of education (galactic years),Intergalactic Development Index (IDI),Education Index,...,Current health expenditure (% of GGP),"Intergalactic Development Index (IDI), female","Intergalactic Development Index (IDI), male",Gender Development Index (GDI),"Intergalactic Development Index (IDI), female, Rank","Intergalactic Development Index (IDI), male, Rank",Adjusted net savings,"Creature Immunodeficiency Disease prevalence, adult (% ages 15-49), total",Private galaxy capital flows (% of GGP),Gender Inequality Index (GII)
0,1007012,KK98 77,0.456086,51.562543,12236.576447,0.593325,10.414164,10.699072,0.547114,0.556267,...,,,,,,,,,,
1,1007012,Reticulum III,0.529835,57.228262,3431.883825,0.675407,7.239485,5.311122,0.497688,0.409969,...,,,,,,,,,,
2,1008016,Reticulum III,0.560976,59.379539,27562.914252,0.594624,11.77489,5.937797,0.544744,0.486167,...,,,,,,,,,,
3,1007012,Segue 1,0.56591,59.95239,20352.232905,0.8377,11.613621,10.067882,0.691641,0.523441,...,,,,,,,,,,
4,1013042,Virgo I,0.588274,55.42832,23959.704016,0.520579,10.392416,6.374637,0.530676,0.580418,...,7.357729,0.583373,0.600445,0.856158,206.674424,224.104054,,7.687626,,


In [6]:
print('количество строк и столбцов в train: ' + str(train.shape) + '; всего значений: ' + str(train.shape[0]*train.shape[1]))
print('количество строк и столбцов в test: ' + str(test.shape) + '; всего значений: ' + str(test.shape[0]*test.shape[1])) 

количество строк и столбцов в train: (3865, 80); всего значений: 309200
количество строк и столбцов в test: (890, 79); всего значений: 70310


In [7]:
print('Всего в train '+str(len(train.galaxy.unique()))+' галактика (страна); ' + str(len(train['galactic year'].unique())) + ' галактических лет')
print('Всего в test '+str(len(test.galaxy.unique()))+' галактика (страна); ' + str(len(test['galactic year'].unique())) + ' галактических лет')

Всего в train 181 галактика (страна); 26 галактических лет
Всего в test 172 галактика (страна); 10 галактических лет


In [8]:
a = set(train['galactic year'].unique())
b = set(test['galactic year'].unique())

print('Галактика, не входящая в train, но входящая в test')
print(b.difference(a))

Галактика, не входящая в train, но входящая в test
{1016064}


In [9]:
test_c = set(test.columns)
train_c = set(train.columns)

print('В тест не входит столбец ')
print(train_c.difference(test_c))

В тест не входит столбец 
{'y'}


In [10]:
#удаление ненужных галактик из train
filt = list(test.galaxy.unique())
train = train.query('galaxy == @filt')

len(train['galaxy'].unique())

172

# Заполнение пропусков

In [11]:
train["galaxy"] = train["galaxy"].astype('category')
train["galaxy"] = train["galaxy"].cat.codes


test["galaxy"] = test["galaxy"].astype('category')
test["galaxy"] = test["galaxy"].cat.codes

In [12]:
imp = IterativeImputer(max_iter=10, random_state=0)

imp.fit(train.values)
tr_new = imp.transform(train.values)

imp.fit(train.iloc[:,:-1].values)
tr_new1 = imp.transform(test.values)



In [13]:
tr_n = pd.DataFrame(tr_new)
tr_n1 = pd.DataFrame(tr_new1)

tr_n.columns = train.columns
tr_n1.columns = test.columns
tr_n1.head()

Unnamed: 0,galactic year,galaxy,existence expectancy index,existence expectancy at birth,Gross income per capita,Income Index,Expected years of education (galactic years),Mean years of education (galactic years),Intergalactic Development Index (IDI),Education Index,...,Current health expenditure (% of GGP),"Intergalactic Development Index (IDI), female","Intergalactic Development Index (IDI), male",Gender Development Index (GDI),"Intergalactic Development Index (IDI), female, Rank","Intergalactic Development Index (IDI), male, Rank",Adjusted net savings,"Creature Immunodeficiency Disease prevalence, adult (% ages 15-49), total",Private galaxy capital flows (% of GGP),Gender Inequality Index (GII)
0,1007012.0,78.0,0.456086,51.562543,12236.576447,0.593325,10.414164,10.699072,0.547114,0.556267,...,8.176749,0.602555,0.637036,0.969703,180.082187,189.685435,10.429761,11.493536,21.706622,0.88064
1,1007012.0,135.0,0.529835,57.228262,3431.883825,0.675407,7.239485,5.311122,0.497688,0.409969,...,7.608054,0.4543,0.533988,0.864666,207.186534,207.892979,8.571801,8.691854,20.785533,0.939801
2,1008016.0,135.0,0.560976,59.379539,27562.914252,0.594624,11.77489,5.937797,0.544744,0.486167,...,7.421059,0.516924,0.581656,0.889696,195.68382,197.982401,12.694055,9.725756,23.424221,0.921074
3,1007012.0,140.0,0.56591,59.95239,20352.232905,0.8377,11.613621,10.067882,0.691641,0.523441,...,8.023184,0.588343,0.636598,0.925058,175.761753,179.126604,14.080313,9.15785,22.814842,0.829622
4,1013042.0,169.0,0.588274,55.42832,23959.704016,0.520579,10.392416,6.374637,0.530676,0.580418,...,7.357729,0.583373,0.600445,0.856158,206.674424,224.104054,5.828995,7.687626,22.220501,0.937344


In [14]:
tr_n1.to_csv('test_full.csv')

# Взгляд на данные

In [15]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3664 entries, 0 to 3864
Data columns (total 80 columns):
galactic year                                                                              3664 non-null int64
galaxy                                                                                     3664 non-null int16
existence expectancy index                                                                 3664 non-null float64
existence expectancy at birth                                                              3664 non-null float64
Gross income per capita                                                                    3636 non-null float64
Income Index                                                                               3636 non-null float64
Expected years of education (galactic years)                                               3547 non-null float64
Mean years of education (galactic years)                                                   3312 non-null f

In [16]:
train.head()

Unnamed: 0,galactic year,galaxy,existence expectancy index,existence expectancy at birth,Gross income per capita,Income Index,Expected years of education (galactic years),Mean years of education (galactic years),Intergalactic Development Index (IDI),Education Index,...,"Intergalactic Development Index (IDI), female","Intergalactic Development Index (IDI), male",Gender Development Index (GDI),"Intergalactic Development Index (IDI), female, Rank","Intergalactic Development Index (IDI), male, Rank",Adjusted net savings,"Creature Immunodeficiency Disease prevalence, adult (% ages 15-49), total",Private galaxy capital flows (% of GGP),Gender Inequality Index (GII),y
0,990025,90,0.628657,63.1252,27109.23431,0.646039,8.240543,,,,...,,,,,,,,,,0.05259
1,990025,28,0.818082,81.004994,30166.793958,0.852246,10.671823,4.74247,0.833624,0.467873,...,,,,,,19.177926,,22.785018,,0.059868
2,990025,169,0.659443,59.570534,8441.707353,0.499762,8.840316,5.583973,0.46911,0.363837,...,,,,,,21.151265,6.53402,,,0.050449
3,990025,154,0.555862,52.333293,,,,,,,...,,,,,,,5.912194,,,0.049394
5,990025,84,0.824692,63.887135,28409.062695,0.671697,14.062458,9.978597,0.815264,0.796807,...,,,,,,40.118699,3.981105,21.012897,,0.052871


In [17]:
test.info() # количество ненулевых строк в столбце и тип строки

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 890 entries, 0 to 889
Data columns (total 79 columns):
galactic year                                                                              890 non-null int64
galaxy                                                                                     890 non-null int16
existence expectancy index                                                                 885 non-null float64
existence expectancy at birth                                                              885 non-null float64
Gross income per capita                                                                    885 non-null float64
Income Index                                                                               885 non-null float64
Expected years of education (galactic years)                                               885 non-null float64
Mean years of education (galactic years)                                                   882 non-null float64
Int

In [18]:
desc = train.describe().T # основные сттистические показатели 
desc

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
galactic year,3664.0,1.000638e+06,6917.134609,990025.000000,995006.000000,1000000.000000,1.005006e+06,1.015056e+06
galaxy,3664.0,8.615857e+01,49.591509,0.000000,43.000000,86.000000,1.290000e+02,1.710000e+02
existence expectancy index,3664.0,8.697104e-01,0.162028,0.227890,0.759411,0.904492,9.899363e-01,1.246908e+00
existence expectancy at birth,3664.0,7.661038e+01,10.462702,34.244062,69.524887,78.789404,8.439609e+01,1.002101e+02
Gross income per capita,3636.0,3.135404e+04,18462.818417,-126.906522,20013.636620,26423.983119,3.673058e+04,1.510727e+05
Income Index,3636.0,8.221158e-01,0.192918,0.292001,0.674836,0.824272,9.666525e-01,1.326911e+00
Expected years of education (galactic years),3547.0,1.465829e+01,3.653047,3.799663,12.472253,14.881222,1.709354e+01,2.695594e+01
Mean years of education (galactic years),3312.0,1.028593e+01,3.338815,1.928166,7.650666,10.405407,1.289633e+01,1.905765e+01
Intergalactic Development Index (IDI),3289.0,8.020458e-01,0.176713,0.273684,0.669714,0.821732,9.383288e-01,1.232814e+00
Education Index,3289.0,7.440325e-01,0.201851,0.189874,0.591228,0.759541,8.931124e-01,1.269625e+00


# Preprocessing

In [19]:
factors = list(train.columns.drop(['galaxy','galactic year', 'y']).values)
X = tr_n.loc[:, factors]
y = tr_n['y'].values

X, X_t, Y, Y_t = train_test_split(X, y, test_size=0.33, random_state=1)

In [20]:
def training(model):
    model.fit(X, Y)

def qual(model):
    
    R2 = model.score(X, Y)
    Yb_ = model.predict(X)
    MSE_b = metrics.mean_squared_error(Y, Yb_)
    RMSE_b = MSE_b**0.5

    Ya_ = model.predict(X_t)
    MSE_a = metrics.mean_squared_error(Y_t, Ya_)
    RMSE_a = MSE_a**0.5

    print('R2: ' + str(R2.round(4)) + ';\nRMSE на ОБУЧАЮЩЕЙ выборке ' + str(RMSE_b.round(4)) + 
          ';\nRMSE на ТЕСТОВОЙ выборке ' + str(RMSE_a.round(4)))

    return RMSE_a

In [21]:
results = {}
results_gr = {}

models = ['lin reg', 'rand forest', 'reg ridge', 'reg lasso', 'svm', 'bayes', 'knn', 'grad boost']

for mod in models:
    results[mod] = 0
    results_gr[mod] = 0
    
results

{'lin reg': 0,
 'rand forest': 0,
 'reg ridge': 0,
 'reg lasso': 0,
 'svm': 0,
 'bayes': 0,
 'knn': 0,
 'grad boost': 0}

# Обучение на заполненных данных

## Линейная регрессия

In [None]:
#Поиск по сетке

#параметры,варианты которых надо задать
parameters = {
    'C'      : [0.001,0.01,0.1,1,10,100,1000],
    'penalty': ['l1','l2']}

    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_lr = GridSearchCV(LinearRegression(), parameters)
gr_lr.fit(X_train, y_train)

# Параметры, давшие наилучший результат. Их потом вставить в модель. Результаты
gr_rf.best_params_

In [22]:
# Сюда вставить полученные лучшие параметры
lin = LinearRegression()
training(lin)

In [23]:
results_gr['lin reg'] = qual(lin)

R2: 0.6574;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0362;
RMSE на ТЕСТОВОЙ выборке 0.0414


In [24]:
lin = LinearRegression()
training(lin)

In [25]:
results['lin reg'] = qual(lin)

R2: 0.6574;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0362;
RMSE на ТЕСТОВОЙ выборке 0.0414


## Cлучайный лес

In [None]:
parameters = {
    'n_estimators'      : [100,150,200],
    'max_depth'         : [7, 10, 13]}
    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_rf = GridSearchCV(RandomForestRegressor(), parameters)
gr_rf.fit(X, Y)

gr_rf.best_params_

In [26]:
# Сюда вставить полученные лучшие параметры
forest_gr = RandomForestRegressor(max_depth = 13, n_estimators = 100)
training(forest_gr)

In [27]:
results_gr['rand forest'] = qual(forest_gr)

R2: 0.9717;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0104;
RMSE на ТЕСТОВОЙ выборке 0.0282


In [28]:
forest = RandomForestRegressor(n_estimators=100)
training(forest)

In [29]:
results['rand forest'] = qual(forest)

R2: 0.975;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0098;
RMSE на ТЕСТОВОЙ выборке 0.0287


## Линейная регрессия ридж

In [None]:
parameters = {
    'alpha'      : [0.01,0.1,1, 10, 100]}
    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_lin_R = GridSearchCV(Ridge(), parameters)
gr_lin_R.fit(X, Y)

gr_lin_R.best_params_

In [30]:
# Сюда вставить полученные лучшие параметры
lin_R_gr = Ridge()
training(lin_R_gr)

In [31]:
results_gr['reg ridge'] = qual(lin_R_gr)

R2: 0.6571;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0362;
RMSE на ТЕСТОВОЙ выборке 0.0413


In [32]:
lin_R = Ridge(alpha=1.0)
training(lin_R)

In [33]:
results['reg ridge'] = qual(lin_R)

R2: 0.6571;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0362;
RMSE на ТЕСТОВОЙ выборке 0.0413


## Линейная регрессия лассо

In [None]:
parameters = {
    'alpha'      : [0.01,0.1,1, 10, 100]}

gr_lin_L = GridSearchCV(Lasso(), parameters)
gr_lin_L.fit(X, Y)

gr_lin_L.best_params_

In [34]:
# Сюда вставить полученные лучшие параметры
lin_L_gr = Lasso(alpha = 0.01)
training(lin_L_gr)

In [35]:
results_gr['reg lasso'] = qual(lin_L_gr)

R2: 0.6283;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0377;
RMSE на ТЕСТОВОЙ выборке 0.0427


In [36]:

lin_L = Lasso(alpha=0.7)
training(lin_L)

In [37]:
results['reg lasso'] = qual(lin_L)

R2: 0.4456;
RMSE на ОБУЧАЮЩЕЙ выборке 0.046;
RMSE на ТЕСТОВОЙ выборке 0.05


## SVM

In [None]:
parameters = {
    'C'      : [0.01, 1,100],
    'gamma'         : ['scale', 'auto']}
    #'max_features': ['auto'],
    #'criterion' :['gini']}


gr_svm = GridSearchCV(SVR(), parameters)
    
    
#gr_svm = GridSearchCV(SVR(), parameters)
gr_svm.fit(X, Y)

gr_svm.best_params_

In [38]:
# Сюда вставить полученные лучшие параметры
vect_gr = SVR(C = 100, gamma = 'scale')
training(vect_gr)

In [39]:
results_gr['svm'] = qual(vect_gr)

R2: 0.3863;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0484;
RMSE на ТЕСТОВОЙ выборке 0.0529


In [40]:
vect = SVR()
training(vect)

In [41]:
results['svm'] = qual(vect)

R2: 0.3799;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0487;
RMSE на ТЕСТОВОЙ выборке 0.0514


## Байсовская ридж регрессия

In [None]:
parameters = {
    'alpha_1'      : [0.01,0.1,1,10,100],
    'alpha_2'         : [0.01,0.1,1,10,100],
    'lambda_1'       :   [0.01,0.1,1,10,100],
    'lambda_2'        :  [0.01,0.1,1,10,100]}
    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_bay = GridSearchCV(BayesianRidge(), parameters)
gr_bay.fit(X, Y)

gr_bay.best_params_

In [42]:
# Сюда вставить полученные лучшие параметры
bay_gr = BayesianRidge(alpha_1=10,alpha_2=1,lambda_1=0.01,lambda_2=0.1)
training(bay_gr)

In [43]:
results_gr['bayes'] = qual(bay_gr)

R2: 0.6571;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0362;
RMSE на ТЕСТОВОЙ выборке 0.0413


In [44]:
bay = BayesianRidge()
training(bay)

In [45]:
results['bayes'] = qual(bay)

R2: 0.6471;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0367;
RMSE на ТЕСТОВОЙ выборке 0.0417


## KNN

In [None]:
parameters = {
    'n_neighbors'      : [3,5,11,19],
    'weights'         : ['uniform', 'distance'],
    'metric'         : ['euclidean', 'manhattan']
    }
    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_knn = GridSearchCV(KNNr(), parameters)
gr_knn.fit(X, Y)

gr_knn.best_params_

In [46]:
# Сюда вставить полученные лучшие параметры
knn_gr = KNNr(metric = 'manhattan', n_neighbors = 19, weights = 'distance' )
training(knn_gr)

In [47]:
results_gr['knn'] = qual(knn_gr)

R2: 1.0;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0;
RMSE на ТЕСТОВОЙ выборке 0.0413


In [48]:
knn = KNNr()
training(knn)

In [49]:
results['knn'] = qual(knn)

R2: 0.7272;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0323;
RMSE на ТЕСТОВОЙ выборке 0.0433


## Gradient Boosting

In [None]:
parameters = {
    'n_estimators'      : [50,100],
    'max_depth'         : [3, 5, 7]}
    #'max_features': ['auto'],
    #'criterion' :['gini']}

gr_gr_bu = GridSearchCV(GradientBoostingRegressor(), parameters)
gr_gr_bu.fit(X, Y)

gr_gr_bu.best_params_

In [50]:
# Сюда вставить полученные лучшие параметры
gr_bu_gr = GradientBoostingRegressor(max_depth = 5)
training(gr_bu_gr)

In [51]:
results_gr['grad boost'] = qual(gr_bu_gr)

R2: 0.9843;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0078;
RMSE на ТЕСТОВОЙ выборке 0.0294


In [52]:
gr_bu = GradientBoostingRegressor()
training(gr_bu)

In [54]:
results['grad boost'] = qual(gr_bu)

R2: 0.9355;
RMSE на ОБУЧАЮЩЕЙ выборке 0.0157;
RMSE на ТЕСТОВОЙ выборке 0.0289


## Result

In [55]:
result = pd.DataFrame()
result['Model'] = results.keys()
result['RMSE_test'] = results.values()

result_gr = pd.DataFrame()
result_gr['Model'] = results_gr.keys()
result_gr['RMSE_grid'] = results_gr.values()

result = pd.merge(result,result_gr, on='Model',how='left')
result = result.sort_values('RMSE_test')

result

Unnamed: 0,Model,RMSE_test,RMSE_grid
1,rand forest,0.028697,0.028224
7,grad boost,0.028863,0.029383
2,reg ridge,0.041298,0.041298
0,lin reg,0.04135,0.04135
5,bayes,0.0417,0.041305
6,knn,0.04332,0.041335
3,reg lasso,0.049999,0.042703
4,svm,0.051385,0.052912


## Предсказание методом случайного леса

In [78]:
tr_n1 = pd.read_csv('test_full.csv')
tr_n1 = tr_n1.iloc[:,1:]
tr_n1.head()

factors = list(tr_n1.columns.drop(['galaxy','galactic year']).values)
X = tr_n1.loc[:, factors]

In [79]:
Y_test = forest_gr.predict(X)

tr_n1['y'] = Y_test

test_to_opt = tr_n1.loc[:, ['y', 'existence expectancy index']]
test_to_opt.head()

Unnamed: 0,y,existence expectancy index
0,0.039174,0.456086
1,0.041312,0.529835
2,0.038873,0.560976
3,0.041214,0.56591
4,0.030489,0.588274


In [80]:
test_to_opt.to_csv('test_to_opt.csv')

# Оптимизация

## Использование оптимизационной функции

In [97]:
test_to_opt = pd.read_csv('test_to_opt.csv')
test_to_opt = test_to_opt.iloc[:, 1:]

index = test_to_opt['y']
pot_inc = (-np.log(index+0.01)+3)**2 / 1000


a = []
for i in range(test_to_opt.shape[0]):
    if test_to_opt['existence expectancy index'][i] < 0.7 :
        a.append(1)
    else:
        a.append(0)
a = np.array(a)
a = a * -1

In [98]:
A_les0 = np.array([1] * test.shape[0]).tolist()
A = [A_les0, a]
b= [50000, -0.1*50000]
c = pot_inc * -1
c = c.tolist()


res = linprog(c, A_ub=A, b_ub=b, bounds=(0,100), method='interior-point')

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwarg

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwarg

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwarg

In [99]:
opti = res.x
test_to_opt['opt_pred'] = opti
#test_to_opt = test_to_opt.iloc[:,1:]
del test_to_opt['existence expectancy index']


test_to_opt1 = test_to_opt.reset_index()
test_to_opt1.columns = ['Index','pred','opt_pred']
#test_to_opt1.to_csv('test_all.csv', index = False)
test_to_opt1.head()

Unnamed: 0,Index,pred,opt_pred
0,0,0.039174,99.999995
1,1,0.041312,99.999994
2,2,0.038873,99.999995
3,3,0.041214,99.999994
4,4,0.030489,99.999997


## Использование случайного заполнения

In [101]:
test_to_opt2 = test_to_opt

test_to_opt2.loc[pot_inc.nlargest(400).index, 'opt_pred']=100
test_to_opt2=test_to_opt.sort_values('y')
test_to_opt2.loc[400:600].opt_pred = 50
test_to_opt2.loc[600:].opt_pred = 0

test_to_opt2=test_to_opt2.sort_index()
test_to_opt2.to_csv('test_all.csv', index = False)
test_to_opt2.head()

Unnamed: 0,y,opt_pred
0,0.039174,100.0
1,0.041312,100.0
2,0.038873,100.0
3,0.041214,100.0
4,0.030489,100.0
