In [None]:
######
# features from ic and fnc usig modified rapids svm notebook
#####


import numpy as np
# import pandas as pd
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import KFold

# wheited error for loss
def metric(y_true, y_pred):
    return np.mean(np.sum(np.abs(y_true - y_pred), axis=0)/np.sum(y_true, axis=0))


fnc_df = pd.read_csv("/kaggle/input/trends-assessment-prediction/fnc.csv")
loading_df = pd.read_csv("/kaggle/input/trends-assessment-prediction/loading.csv")


fnc_features, loading_features = list(fnc_df.columns[1:]), list(loading_df.columns[1:])
df = fnc_df.merge(loading_df, on="Id")


labels_df = pd.read_csv("/kaggle/input/trends-assessment-prediction/train_scores.csv")
labels_df["is_train"] = True

df = df.merge(labels_df, on="Id", how="left")

test_df = df[df["is_train"] != True].copy()
df = df[df["is_train"] == True].copy()

df.shape, test_df.shape

# Giving less importance to FNC features since they are easier to overfit due to high dimensionality.
FNC_SCALE = 1/500

df[fnc_features] *= FNC_SCALE
test_df[fnc_features] *= FNC_SCALE

%%time

NUM_FOLDS = 7
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

target_pred = {'age':'age_pred', 'domain1_var1':'domain1_var1_pred',
               'domain1_var2':'domain1_var2_pred','domain2_var1': 'domain2_var1_pred',
               'domain2_var2': 'domain2_var2_pred'}

features = loading_features + fnc_features

overal_score = 0
# for feature, C(smoothing factor) w(weight) for scoringd
for target, c, w in [("age", 100, 0.3), ("domain1_var1", 10, 0.175), ("domain1_var2", 10, 0.175), ("domain2_var1", 10, 0.175), ("domain2_var2", 10, 0.175)]:    
    y_oof = np.zeros(df.shape[0])
    y_test = np.zeros((test_df.shape[0], NUM_FOLDS))
    y_train = np.zeros((train_df.shape[0], NUM_FOLDS))
    
    for f, (train_ind, val_ind) in enumerate(kf.split(df, df)):
        train_df, val_df = df.iloc[train_ind], df.iloc[val_ind]
        train_df = train_df[train_df[target].notnull()]
        
        # fitting the model
        model = SVR(C=c, cache_size=3000.0, verbose=True)
        model.fit(train_df[features], train_df[target])
        
        # train set predictions
        y_oof[val_ind] = model.predict(val_df[features])
        # test set predictions
        y_test[:, f] = model.predict(test_df[features])
        
    df["pred_{}".format(target)] = y_oof
    # mean of models
    test_df[target] = y_test.mean(axis=1)
    score = metric(df[df[target].notnull()][target].values, df[df[target].notnull()]["pred_{}".format(target)].values)
    overal_score += w*score
    print(target, np.round(score, 4))
    print()
    
print("Overal score:", np.round(overal_score, 4))


# making submission
sub_df = pd.melt(test_df[["Id", "age", "domain1_var1", "domain1_var2", "domain2_var1", "domain2_var2"]], id_vars=["Id"], value_name="Predicted")
sub_df["Id"] = sub_df["Id"].astype("str") + "_" +  sub_df["variable"].astype("str")

sub_df = sub_df.drop("variable", axis=1).sort_values("Id")
assert sub_df.shape[0] == test_df.shape[0]*5
sub_df.head(10)

sub_df.to_csv("fnc_ic_test_sub.csv", index=False)

# making sets for next level svm ensemble
test_df[["Id", "age", "domain1_var1", "domain1_var2", "domain2_var1", "domain2_var2"]].to_csv('fnc_ic_test_pred')

df[["Id", "pred_age", "pred_domain1_var1", "pred_domain1_var2", "pred_domain2_var1", "pred_domain2_var2"]].to_csv('fnc_ic_train_pred')

In [None]:
# import all nesseccary and shitty libraries
!pip install -q colored
!pip install -q torchviz

import os
import gc
import cv2
import time
import h5py
import colored
from colored import fg, bg, attr

from skimage import measure
from plotly.offline import iplot
from plotly import figure_factory as FF
from IPython.display import Markdown, display

import numpy as np
import pandas as pd
from random import randint
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import torch
from torchviz import make_dot
torch.backends.cudnn.benchmark = True

import torch.nn as nn
from torch.optim import Adam, RMSprop, SparseAdam, Adamax
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision.models import resnet18, densenet121, mobilenet_v2

## Set hyperparameters and paths <a id="1.2"></a> <font color="#fa7703" size=4>(adjust these to improve LB scores :D)</font>

1. Choose hyperparameters such as epochs, split percentage, learning rate, etc
2. Set important paths and directories to load data and train the model

EPOCHS = 5
SPLIT = 0.8
LR = (1e-4, 1e-3)
MODEL_SAVE_PATH = "resnet_model"

W = 192
H = 64
loading_scale = 500
BATCH_SIZE = 32
VAL_BATCH_SIZE = 32
DATA_PATH = '../input/trends-assessment-prediction/'

## Load .csv data <a id="1.3"></a> <font color="#fa7703" size=4>(to access 3D fMRI maps for training and validation)</font>

1. Load dataframes with IDs, targets, and tabular features
2. Extract relevant IDs for training and testing from the dataframes

TEST_MAP_PATH = DATA_PATH + 'fMRI_test/'
TRAIN_MAP_PATH = DATA_PATH + 'fMRI_train/'

FEAT_PATH = DATA_PATH + 'fnc.csv'
TARG_PATH = DATA_PATH + 'train_scores.csv'
SAMPLE_SUB_PATH = DATA_PATH + 'sample_submission.csv'

TEST_IDS = [map_id[:-4] for map_id in sorted(os.listdir(TEST_MAP_PATH))]
TRAIN_IDS = [map_id[:-4] for map_id in sorted(os.listdir(TRAIN_MAP_PATH))]

targets = pd.read_csv(TARG_PATH)
targets = targets.fillna(targets.mean())
sample_submission = pd.read_csv(SAMPLE_SUB_PATH)

features = pd.read_csv(FEAT_PATH)
test_df = features.query('Id in {}'.format(TEST_IDS)).reset_index(drop=True)
train_df = features.query('Id in {}'.format(TRAIN_IDS)).reset_index(drop=True)

# Modeling <a id="2"></a>

## Build PyTorch Dataset <a id="2.1"></a> <font color="#fa7703" size=4>(with map slicing and resizing)</font>

1. Retrieve all 53 fMRI maps for a given ID
2. Randomly slice each map along the *x, y,* and *z* axes to get 159 2D slices
3. Resize each 2D map to the shape (64, 64) using OpenCV-2 and concatenate all slices
4. Get targets for given ID and stack 159 times (each slice has the same target)
5. Return the calculated image tensors and target tensors for the given patient ID

loading = pd.read_csv('/kaggle/input/trends-assessment-prediction/loading.csv')
loading = loading.set_index('Id')



# code same as pytorch data...... but took too long in this script
# i left the code for reference

# get paths of preproccessed files
# these slice files were mde using nilean, find xyz_cut_coords
import os
import fnmatch

BASEPATH = '/kaggle/input/'

dir_list = ['pytorch-data-train-1', 'pytorch-data-train-2', 'pytorch-data-train-3', 'pytorch-data-train-4', 'pytorch-data-train-5', 'pytorch-data-train-6', 'pytorch-data-train-7',
           'pytorch-data-test-1', 'pytorch-data-test-2', 'pytorch-data-test-3', 'pytorch-data-test-4', 'pytorch-data-test-5', 'pytorch-data-test-6', 'pytorch-data-test-7',
           'missing-files-1', 'missing-files-2', 'missing-files-3', 'missing-files-4']
files = {}
for directory in dir_list:
    for file in os.listdir(BASEPATH + directory + '/'):
        if fnmatch.fnmatch(file, '*.npy'):
            files[file[:-4]] = BASEPATH + directory + '/' + file

class TReNDSDataset(Dataset):
    # initialize the dataset
    def __init__(self, data, targets, map_path, is_train):
        self.data = data
        self.is_train = is_train
        self.map_path = map_path
        self.map_id = self.data.Id
        if is_train: self.targets = targets
            
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # load preprosseced files
        path_npy = files[str(self.map_id[idx])]
        npy = np.load(path_npy)
        # concatinate only used if you want to mess around with the size of the slices
        features = np.concatenate([npy[::3, :, :], npy[1::3, :, :], npy[2::3, :, :]], axis=1)
        
        if not self.is_train:
            # if predictiong return without targets
            return torch.FloatTensor(features)
        else:
            i = self.map_id[idx]
            targets = self.targets.query('Id == {}'.format(i)).values
            targets = np.repeat(targets[:, 1:], 53, 0).reshape(-1, 5)
            return torch.FloatTensor(features), torch.FloatTensor(targets)

# npy = np.load(files['14696'])
# np.concatenate([npy[::3, :, :], npy[1::3, :, :], npy[2::3, :, :]], axis=1).shape

## Build ResNet model <a id="2.2"></a> <font color="#fa7703" size=4>(with a double dense head)</font>

1. Get ResNet-18 head (till AvgPool)
2. Add two Dense layers (16 and 5 neurons) on top
3. Reshape and tile image to match ResNet input dimensions
4. Pass reshaped image tensor through neural network and get output


class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        
        self.identity = lambda x: x
        self.dense_out = nn.Linear(16, 5)
        self.dense_in = nn.Linear(512, 16)
        # load pretrained model from pytorch deep visionimagenet lib
        self.resnet = resnet18(pretrained=True, progress=False)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        
    def forward(self, img):
        # reshape image for compatibility with model
        img = img.reshape(-1, 1, H, W)
        feat = self.resnet(img.repeat(1, 3, 1, 1))
        
        conc = self.dense_in(feat.squeeze())
        return self.identity(self.dense_out(conc))

## Define custom weighted absolute error loss<a id="2.4"></a> <font color="#fa7703" size=4>(for backpropagation)</font>

1. Define weightage for each target
2. Calculate weighted absolute errors and take the average

def weighted_nae(inp, targ):
    W = torch.FloatTensor([0.3, 0.175, 0.175, 0.175, 0.175])
    return torch.mean(torch.matmul(torch.abs(inp - targ), W.cuda()/torch.mean(targ, axis=0)))

## Define helper function for training logs <a id="2.5"></a> <font color="#fa7703" size=4>(to check training status)</font>

1. Retrieve training and validation metrics
2. Format metrics and display them during training

def print_metric(data, batch, epoch, start, end, metric, typ):
    time = np.round(end - start, 1)
    time = "Time: %s{}%s s".format(time)

    if typ == "Train":
        pre = "BATCH %s" + str(batch-1) + "%s  "
    if typ == "Val":
        pre = "EPOCH %s" + str(epoch+1) + "%s  "
    
    fonts = (fg(216), attr('reset'))
    value = np.round(data.item(), 3)
    t = typ, metric, "%s", value, "%s"

    print(pre % fonts , end='')
    print("{} {}: {}{}{}".format(*t) % fonts + "  " + time % fonts)

## Split data into training and validation sets <a id="2.6"></a> <font color="#fa7703" size=4>(to validate performance properly)</font>

1. Split the data into training and validation sets
2. Define the test data loader to run inference after training

val_out_shape = -1, 5
train_out_shape = -1, 5

split = int(SPLIT*len(train_df))
val = train_df[split:].reset_index(drop=True)
train = train_df[:split].reset_index(drop=True)

test_set_p = TReNDSDataset(train_df, None, TRAIN_MAP_PATH, False)
test_loader_p = DataLoader(test_set_p, batch_size=VAL_BATCH_SIZE)

test_set = TReNDSDataset(test_df, None, TEST_MAP_PATH, False)
test_loader = DataLoader(test_set, batch_size=VAL_BATCH_SIZE)

## Train model on GPU <a id="2.7"></a> <font color="#fa7703" size=4>(NVIDIA Tesla P100)</font>

1. Define dataloders, model, optimizer, and learning rate scheduler
2. Train the model in batches and check validation performance at the end of each epoch
3. Save the model architecture and weights and generate testing predictions after training

def train_resnet18():
    def cuda(tensor):
        return tensor.cuda()
   
    val_set = TReNDSDataset(val, targets, TRAIN_MAP_PATH, True)
    val_loader = DataLoader(val_set, batch_size=VAL_BATCH_SIZE)
    train_set = TReNDSDataset(train, targets, TRAIN_MAP_PATH, True)
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

    network = cuda(ResNetModel())
    optimizer =  Adam([{'params': network.resnet.parameters(), 'lr': LR[0]},
                      {'params': network.dense_in.parameters(), 'lr': LR[1]},
                      {'params': network.dense_out.parameters(), 'lr': LR[1]}])

    scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.95,
                                  patience=4, verbose=True, eps=1e-6)
    start = time.time()
    for epoch in range(EPOCHS):
        batch = 1
        fonts = (fg(216), attr('reset'))
        print(("EPOCH %s" + str(epoch+1) + "%s") % fonts)

        for train_batch in train_loader:
            train_img, train_targs = train_batch
           
            network.train()
            network = cuda(network)
            train_preds = network.forward(cuda(train_img))
            train_targs = train_targs.reshape(train_out_shape)
            train_loss = weighted_nae(train_preds, cuda(train_targs))

            optimizer.zero_grad()
            train_loss.backward()

            optimizer.step()
            end = time.time()
            batch = batch + 1
            print_metric(train_loss, batch, epoch, start, end, metric="loss", typ="Train")
            
        print("\n")
           
        network.eval()
        for val_batch in val_loader:
            img, targ = val_batch
            val_preds, val_targs = [], []

            with torch.no_grad():
                img = cuda(img)
                network = cuda(network)
                pred = network.forward(img)
                val_preds.append(pred); val_targs.append(targ)

        val_preds = torch.cat(val_preds, axis=0)
        val_targs = torch.cat(val_targs, axis=0)
        val_targs = val_targs.reshape(val_out_shape)
        val_loss = weighted_nae(val_preds, cuda(val_targs))
        
        avg_preds = []
        avg_targs = []
        for idx in range(0, len(val_preds), 53):
            avg_preds.append(val_preds[idx:idx+53].mean(axis=0))
            avg_targs.append(val_targs[idx:idx+53].mean(axis=0))
            
        avg_preds = torch.stack(avg_preds, axis=0)
        avg_targs = torch.stack(avg_targs, axis=0)
        loss = weighted_nae(avg_preds, cuda(avg_targs))
        
        end = time.time()
        scheduler.step(val_loss)
        print_metric(loss, None, epoch, start, end, metric="loss", typ="Val")
        
        print("\n")
        
        # copied two times down here so model also makes predictions on the training set
   
    network.eval()
    if os.path.exists(TRAIN_MAP_PATH):

        test_preds = []
        for test_img in test_loader:
            with torch.no_grad():
                network = cuda(network)
                test_img = cuda(test_img)
                test_preds.append(network.forward(test_img))
                
                
                
        test_preds_p = []
        for test_img_p in test_loader_p:
            with torch.no_grad():
                network = cuda(network)
                test_img_p = cuda(test_img_p)
                test_preds_p.append(network.forward(test_img_p))
        
        
        avg_preds = []
        test_preds = torch.cat(test_preds, axis=0)
        for idx in range(0, len(test_preds), 53):
            avg_preds.append(test_preds[idx:idx+53].mean(axis=0))
            
        avg_preds_p = []
        test_preds_p = torch.cat(test_preds_p, axis=0)
        for idx in range(0, len(test_preds_p), 53):
            avg_preds_p.append(test_preds_p[idx:idx+53].mean(axis=0))


        return torch.stack(avg_preds_p, axis=0).detach().cpu().numpy(), torch.stack(avg_preds, axis=0).detach().cpu().numpy()

print("STARTING TRAINING ...\n")

# train resnet get results for second level ensemble model
train_preds_final, test_preds_final = train_resnet18()
    
print("ENDING TRAINING ...")

pd.DataFrame(train_preds_final, columns=targets.columns[1:], index=train_df['Id']).to_csv('res_preds_traintime2.csv')

pd.DataFrame(test_preds_final, columns=targets.columns[1:], index=test_df['Id']).to_csv('res_preds_testtime2.csv')

In [None]:


# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# load csv files
loading = pd.read_csv('/kaggle/input/trends-assessment-prediction/loading.csv')
train_scores = pd.read_csv('/kaggle/input/trends-assessment-prediction/train_scores.csv')

# replace nan with mean of columns
data_list = [loading, train_scores]

for data in data_list:
    for col in data.columns:
        data[col].fillna(data[col].mean(), inplace=True)
        
loading = loading[sorted(loading.columns)]

# {'age': SVR(C=50, cache_size=200, coef0=0.0, degree=3, epsilon=1.1, gamma='scale',
#      kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#  'domain1_var1': SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.9, gamma='scale',
#      kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#  'domain1_var2': SVR(C=5, cache_size=200, coef0=0.0, degree=3, epsilon=0.01, gamma='scale',
#      kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#  'domain2_var1': SVR(C=5, cache_size=200, coef0=0.0, degree=3, epsilon=0.9, gamma='scale',
#      kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#  'domain2_var2': SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=1.1, gamma='scale',
#      kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)}
X_train = loading[loading.Id.isin(train_scores.Id)].set_index('Id').astype(np.float)
X_for_pred = loading[~loading.Id.isin(train_scores.Id)].set_index('Id').astype(np.float)
y_train = train_scores.astype(np.float).set_index('Id').astype(np.float)

# from sklearn.svm import SVR
# from sklearn.model_selection import KFold, GridSearchCV

# features = ('age', 'domain1_var1', 'domain1_var2','domain2_var1','domain2_var2')


# model = SVR()
# cv = KFold(n_splits = 5, shuffle=True, random_state=42)
# grid = {'kernel': ['linear', 'rbf'], 'epsilon': [0.01, 0.9, 1.1],'C':[5, 50, 1000]}
# gs = GridSearchCV(model, grid, n_jobs=-1, cv=cv, verbose=50, scoring='neg_mean_absolute_error')



# best_models = {}
# for col in features:
#     gs.fit(X_train, y_train[col])
#     best_models[col] = gs.best_estimator_
#     print(gs.best_score_)

# predictions_train = pd.DataFrame(X_train,columns=['Id'], dtype=str)
# predictions_test = pd.DataFrame(X_for_pred,columns=['Id'], dtype=str)

# for col in features:
#         predictions_train[col] = best_models[col].predict(X_train)
        
# for col in features:
#         predictions_test[col] = best_models[col].predict(X_for_pred)
        

# predictions_test = predictions_test.drop(columns='Id').reset_index('Id')
# predictions_train = predictions_train.drop(columns='Id').reset_index('Id')

# predictions_train.to_csv('IC_train.csv')
# predictions_test.to_csv('IC_test.csv')

predictions_train = pd.read_csv('/kaggle/input/rapids-fnc-ic/fnc_ic_train_pred')
predictions_test = pd.read_csv('/kaggle/input/rapids-fnc-ic/fnc_ic_test_pred')

predictions_train.columns = ['unnamed', 'Id', 'age', 'domain1_var1', 'domain1_var2','domain2_var1','domain2_var2']
predictions_test.columns = ['unnamed', 'Id', 'age', 'domain1_var1', 'domain1_var2','domain2_var1','domain2_var2']


res_train = pd.read_csv('/kaggle/input/res350/res_preds_traintime3.csv')
res_test = pd.read_csv('/kaggle/input/res350/res_preds_testtime3.csv')

fnc_train = pd.read_csv('/kaggle/input/fncwide5/fnc_train5.csv')
fnc_test = pd.read_csv('/kaggle/input/fncwide5/fnc_test5.csv')

# combine columns and downsize by some stupid factor (probably not nesseccary)

def X_combine(col):
    return pd.DataFrame([res_train[col], predictions_train[col]]).transpose() * 1/200

def X_combine_test(col):
    return pd.DataFrame([res_test[col], predictions_test[col]]).transpose() * 1/200

X_combine('age')

from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

# grid search through several models
svr = SVR(cache_size=3000.0)
gbr = GradientBoostingRegressor(max_depth=15)
ridge = Ridge()
cv = KFold(n_splits = 9, shuffle=True)
model_list = [svr, gbr, ridge]
param_svr = {'kernel': ['linear', 'rbf'], 'epsilon': [ 0.001, 0.1,0.3, 0.6, 1.5],'C':[0.1, 1, 1000]}
param_gbr = {'n_estimators':[5, 100, 500]}
param_ridge = {'alpha':[0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]}
param_list = [param_svr, param_gbr, param_ridge]

features = ['age', 'domain1_var1', 'domain1_var2','domain2_var1','domain2_var2']

# def best_train(target):
#     X_train_best = SelectKBest(f_regression, k=100).fit_transform(X_train.iloc[:, 1:]* 1/300, train_scores[target])
#     return X_train_best

# turns out that svm almost always outperforms so this loop is totally unneccessary
best_models = {}
for target in features:
    print(target)
    i = 0
    #for i in range(3):
    gs = GridSearchCV(model_list[i], param_list[i], n_jobs=-1, cv=cv, verbose=5, scoring='neg_mean_absolute_error')
    gs.fit(X_combine(target), train_scores[target])
    if i == 0:
        best_models[target] = gs.best_estimator_, gs.best_score_
    _, score = best_models[target]
    if gs.best_score_ > score:
        best_models[target] = gs.best_estimator_, gs.best_score_
    print(gs.best_estimator_)
    print(gs.best_score_)

ensemble_pred = np.zeros((5877, 5))

# beat models example for reference

# {'age': (SVR(C=10, cache_size=1000.0, coef0=0.0, degree=3, epsilon=1.5, gamma='scale',
#       kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#   -7.1568774623455225),
#  'domain1_var1': (SVR(C=1000, cache_size=1000.0, coef0=0.0, degree=3, epsilon=0.6, gamma='scale',
#       kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#   -7.371505402344856),
#  'domain1_var2': (SVR(C=0.1, cache_size=1000.0, coef0=0.0, degree=3, epsilon=0.001, gamma='scale',
#       kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#   -8.297672332611237),
#  'domain2_var1': (SVR(C=1, cache_size=1000.0, coef0=0.0, degree=3, epsilon=0.3, gamma='scale',
#       kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#   -8.514475587821579),
#  'domain2_var2': (SVR(C=1, cache_size=1000.0, coef0=0.0, degree=3, epsilon=0.6, gamma='scale',
#       kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
#   -9.08038587009841)}


# make submission

i = 0
for col in features:
    model, score = best_models[col]
    ensemble_pred[:, i] = model.predict(X_combine_test(col))
    i += 1
ensemble_pred

sample_sub = pd.read_csv('/kaggle/input/trends-assessment-prediction/sample_submission.csv')

sample_sub['Predicted'] = ensemble_pred.flatten()


sample_sub.head()

sample_sub.to_csv('ensemble_sub_14.csv', index=False)