In [None]:
# import libraries

import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

In [None]:
# create a list of feature names

FEATURES = []
for i in range(1, 769):
  FEATURES.append('feature_' + str(i))

In [None]:
# create a list of label names

LABELS = ['label_1', 'label_2', 'label_3', 'label_4']

In [None]:
# read the data

df_train = pd.read_csv('/kaggle/input/layer-8-dataset/train.csv')
df_valid = pd.read_csv('/kaggle/input/layer-8-dataset/valid.csv')
df_test = pd.read_csv('/kaggle/input/layer-8-dataset/test.csv')

In [None]:
# store the data in a dictionary for each label

data_dict = dict()

for label in LABELS:
  data_dict[label] = dict()
  
  data_dict[label]['x_train'] = df_train[df_train[label].notna()][FEATURES].values
  data_dict[label]['y_train'] = df_train[df_train[label].notna()][label].values
  data_dict[label]['x_valid'] = df_valid[df_valid[label].notna()][FEATURES].values
  data_dict[label]['y_valid'] = df_valid[df_valid[label].notna()][label].values
  data_dict[label]['x_test'] = df_test[FEATURES].values

In [None]:
# create svc models for each label

model_dict = dict()

model_dict[LABELS[0]] = SVC(C=100, gamma='scale', kernel='rbf')
model_dict[LABELS[1]] = SVC(C=30, gamma='scale', kernel='rbf')
model_dict[LABELS[2]] = SVC(C=100, gamma='scale', kernel='rbf')
model_dict[LABELS[3]] = SVC(C=30, gamma='scale', kernel='rbf')

In [None]:
# PCA for label 2, 3, 4

pca = PCA(n_components=0.95, svd_solver='full')

for label in LABELS[1:]:
  data_dict[label]['x_train'] = pca.fit_transform(data_dict[label]['x_train'])
  data_dict[label]['x_valid'] = pca.transform(data_dict[label]['x_valid'])
  data_dict[label]['x_test'] = pca.transform(data_dict[label]['x_test'])


In [None]:
# train and test the models for each label

df_test_pred = pd.DataFrame()

for label in LABELS:
  model_dict[label].fit(data_dict[label]['x_train'], data_dict[label]['y_train'])
  data_dict[label]['y_pred'] = model_dict[label].predict(data_dict[label]['x_valid'])
  print('accuracy_score for {}: '.format(label), accuracy_score(data_dict[label]['y_valid'], data_dict[label]['y_pred']))
  
  data_dict[label]['y_pred_test'] = model_dict[label].predict(data_dict[label]['x_test'])
  df_test_pred[label] = data_dict[label]['y_pred_test']



In [None]:
df_test_pred.index += 1
df_test_pred.index.name = 'ID'
df_test_pred.to_csv('submission_final.csv')