**Notebook de referencia** https://www.kaggle.com/gogo827jz/rapids-svm-on-gpu-6000-models-in-1-hour

In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from time import time

# Carga de datos

In [None]:
sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets= pd.read_csv('../input/lish-moa/train_targets_scored.csv')

# Preprocesado de los datos

### Categorical Pipelines

In [None]:
def categorical_encoding(df):
  df['cp_type'] = df['cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
  df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
  df['cp_time'] = df['cp_time'].map({24:1, 48:2, 72:3})

  return df

## Preprocess train data

## Train

Delete ID Columns and then we encode categorical labels

In [None]:
df_train = train_features.copy()
df_train.drop('sig_id', axis = 1, inplace = True)

df_train = categorical_encoding(df_train)

## Target

In [None]:
train_targets.drop('sig_id', axis = 1, inplace = True)

## Test

In [None]:
df_test = test_features.copy()
df_test.drop('sig_id', axis = 1, inplace = True)

df_test = categorical_encoding(df_test)

# SVC

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler

import datetime

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(df_train.values)
X_tt = scaler.transform(df_test.values)

In [None]:
#Copies
res = train_targets.copy()
train = df_train.copy()

sample_submission.loc[:, train_targets.columns] = 0 # putting all columns to 0
res.loc[:, train_targets.columns] = 0 # putting all columns to 0

N_STARTS = 1 #Semillas
N_SPLITS = 3

for n in tqdm(range(train_targets.shape[1])):

  start_time = time() #Time for tqdm
  target = train_targets.values[:,n] #y n column

  if target.sum() >= N_SPLITS:

    for seed in range(N_STARTS):

      skf = StratifiedKFold(n_splits = N_SPLITS, random_state = seed, shuffle = True)

      for j, (train_idx, test_idx) in enumerate(skf.split(target, target)):

        x_train, x_test = X[train_idx],X[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        if y_train.sum() >= 5:

          model = SVC(probability = True, cache_size = 2000)
          model.fit(x_train,  y_train)

          sample_submission.loc[:, train_targets.columns[n]] += model.predict_proba(df_test)[:, 1] / (N_SPLITS * N_STARTS) # Añadimos la media de nuestro valor predicho al conjunto para evaluar
          res.loc[test_idx, train_targets.columns[n]] += model.predict_proba(x_test)[:, 1] / N_STARTS # Añadimos la media de nuestro valor predicho a nuestro conjunto para medir la métrica

        else: 

          print(f'Target {target}: Seed {seed}: Fold {n}: Not enough positive values for give probability.')

          model = SVC(cache_size = 2000)

          model.fit(x_train,  y_train)

          sample_submission.loc[:, train_targets.columns[n]] += model.predict_proba(df_test)[:, 1] / (N_SPLITS * N_STARTS) # Añadimos la media de nuestro valor predicho al conjunto para evaluar
          res.loc[test_idx, train_targets.columns[n]] += model.predict_proba(x_test)[:, 1] / N_STARTS # Añadimos la media de nuestro valor predicho a nuestro conjunto para medir la métrica

        col_score = log_loss(train_targets.loc[:, train_targets.columns[n]], res.loc[:, train_targets.columns[n]])

        print(f'[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] Target {target}:', col_score)




