# MO solution

This notebook contains solution to the task 

In [1]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from tqdm import tqdm

In [2]:
DATA_PATH = 'data'

In [3]:
train = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
user_decision = pd.read_csv(os.path.join(DATA_PATH, 'user_decision.csv'))
# append test set to train and mark it with target variables -1
sample = pd.read_csv(os.path.join(DATA_PATH, 'sample_solution.csv'))
sample.iloc[:, 1:] = -1
train = pd.concat((train, sample)).rename(columns = {'id': 'user_id'})
decision = pd.read_excel(os.path.join(DATA_PATH, 'decision.xlsx'))

# cluster decisions

This section contains clustering if available

In [4]:
user_decision = user_decision.merge(decision, on = 'decision_id')
user_decision

Unnamed: 0,user_id,period,decision_id,decision_name,Unnamed: 2
0,10625,1,409,Sending Engineers on training courses,
1,10659,1,409,Sending Engineers on training courses,
2,10200,1,409,Sending Engineers on training courses,
3,10653,4,409,Sending Engineers on training courses,
4,10760,4,409,Sending Engineers on training courses,
...,...,...,...,...,...
60084,10470,3,7,Fixed % from net profit,
60085,10715,4,258,Incoterms - International providers,
60086,10566,3,233,[Bank 3] Deposit term,
60087,10942,3,233,[Bank 3] Deposit term,


# Decision statistics

As user's information I used number of actions per each round and number of actions per each unique actions. For dimensionality reduction PCA was chosen.

In [5]:
period_stats = user_decision.groupby(['user_id', 'period']).size().reset_index()
period_stats = pd.pivot(period_stats, index='user_id', columns='period', values=0).fillna(0).reset_index()
# period_stats = period_stats.div(period_stats.sum(axis=1), axis=0).reset_index()
period_stats['max_activity_round'] = np.argmax(period_stats.iloc[:, 1:].values, axis = 1)
period_stats['min_activity_round'] = np.argmin(period_stats.iloc[:, 1:].values, axis = 1)
total_decisions = user_decision.groupby(['user_id', 'decision_name']).size().reset_index()
total_decisions = total_decisions.loc[total_decisions.decision_name.isin(total_decisions.decision_name.value_counts().iloc[:].reset_index().iloc[:, 0])]
total_decisions = pd.pivot(total_decisions, index='user_id', columns='decision_name', values=0).fillna(0)
total_decisions = total_decisions.div(total_decisions.sum(axis=1), axis=0)
pca = PCA(n_components=40)
total_decisions = pd.DataFrame(pca.fit_transform(total_decisions), index = total_decisions.index)
period_stats = period_stats.merge(total_decisions, left_on = 'user_id', right_index = True, how ='left')
period_stats = period_stats.fillna(0)
period_stats

Unnamed: 0,user_id,1_x,2_x,3_x,4_x,max_activity_round,min_activity_round,0,1_y,2_y,...,30,31,32,33,34,35,36,37,38,39
0,10001,29.0,23.0,39.0,15.0,2,4,-0.030291,-0.006430,0.017851,...,0.020573,-0.000856,-0.008560,-0.011411,0.018906,0.005373,-0.009441,-0.011229,-0.009654,0.004112
1,10002,0.0,9.0,25.0,22.0,2,0,-0.032863,-0.021875,-0.006390,...,-0.004992,-0.003705,0.001937,-0.011131,-0.001333,0.001362,-0.017483,-0.001370,0.001055,-0.001690
2,10003,21.0,41.0,60.0,84.0,3,4,-0.029989,-0.017806,-0.006047,...,0.006158,-0.003328,-0.000205,0.003017,0.004618,0.009170,-0.003195,0.000745,-0.006851,0.003293
3,10004,3.0,19.0,41.0,27.0,2,4,-0.022333,0.047818,-0.009788,...,0.007878,-0.007855,0.007538,0.008733,-0.004742,-0.007558,-0.000279,0.008105,-0.004649,-0.007266
4,10005,14.0,34.0,31.0,0.0,1,3,-0.016416,0.006393,0.020000,...,-0.006537,0.022686,-0.000205,0.025436,-0.016413,-0.001203,-0.012322,-0.010311,-0.001491,-0.004079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968,10969,0.0,1.0,4.0,8.0,3,0,0.053807,-0.042725,-0.010973,...,0.011137,-0.008545,-0.019935,-0.010829,-0.012256,-0.015577,-0.003402,-0.030237,0.019108,0.000779
969,10970,11.0,22.0,32.0,0.0,2,3,-0.025492,-0.001217,-0.014446,...,-0.006605,-0.005218,-0.012392,0.002529,0.004358,0.014397,0.005774,-0.000225,0.021551,0.000223
970,10971,22.0,26.0,12.0,25.0,1,4,-0.015419,0.008259,-0.008471,...,0.008411,0.008315,0.003801,-0.000708,-0.002173,0.003678,-0.010298,0.005198,0.004364,0.002756
971,10972,0.0,0.0,0.0,4.0,3,0,-0.029505,-0.045094,-0.024273,...,-0.003620,0.015228,-0.029064,0.029591,0.061574,0.074959,-0.001204,0.078761,0.107480,-0.024622


# team info

For team information scores for every round (and mean one) were used, as well as mean targerts of other team members, since target variables are highly correlated (excluding the predicting user of course).

In [6]:
take = ['score']
user = pd.read_csv(os.path.join(DATA_PATH, 'user.csv'))
team_point = pd.read_csv(os.path.join(DATA_PATH, 'team_point.csv'))
team_point = team_point.groupby(['team_id', 'category_id']).mean().loc[:, ['score', 'place']].reset_index(level = 1)
team_point = pd.pivot(team_point, values = take, columns = ['category_id'])
if 'score' in team_point.columns:
    team_point['mean_scores'] = np.mean(team_point['score'], axis = 1)
if 'place' in team_point.columns:
    team_point['mean_place'] = np.mean(team_point['place'], axis = 1)
team_point.columns = team_point.columns.map(lambda x: x[0] + str(x[1]))
team_point = team_point.merge(user.loc[:, ['user_id', 'team_id']], on = 'team_id').drop(['team_id'], axis = 1)

user = user.merge(train, on = 'user_id')
team_stats = pd.DataFrame()
for i, row in tqdm(user.iterrows()):
    user_t = user.loc[(user.team_id == row.team_id) & (user.user_id != row.user_id) & (user['Analytical thinking'] > 0)]
    mean_results = dict(np.mean(user_t.iloc[:, 3:], axis = 0))
    mean_results.update({'user_id': row.user_id})
    team_stats = team_stats.append(mean_results, True)
team_stats = team_stats.fillna(0)

team_point = team_point.merge(team_stats, on = 'user_id', how = 'left')
team_point

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results,

  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
  team_stats = team_stats.append(mean_results, True)
973it [00:02, 407.78it/s]


Unnamed: 0,score1,score2,score3,score4,score5,score6,mean_scores,user_id,Analytical thinking,Systemic thinking,Adaptability,Focus
0,35.6,59.4,101.2,47.0,26.6,269.8,89.933333,10273,5.000000,5.000000,5.0,5.0
1,35.6,59.4,101.2,47.0,26.6,269.8,89.933333,10762,4.500000,4.500000,5.0,5.0
2,35.6,59.4,101.2,47.0,26.6,269.8,89.933333,10646,4.500000,4.500000,5.0,5.0
3,-0.2,5.4,81.0,30.4,14.6,131.2,43.733333,10547,5.000000,5.000000,5.0,5.0
4,-0.2,5.4,81.0,30.4,14.6,131.2,43.733333,10833,5.000000,5.000000,5.0,5.5
...,...,...,...,...,...,...,...,...,...,...,...,...
968,10.0,24.6,76.2,34.2,34.8,179.8,59.933333,10375,4.666667,4.333333,5.0,5.0
969,10.0,24.6,76.2,34.2,34.8,179.8,59.933333,10860,4.333333,4.333333,5.0,5.0
970,10.0,24.6,76.2,34.2,34.8,179.8,59.933333,10022,4.333333,4.333333,5.0,5.0
971,10.0,24.6,76.2,34.2,34.8,179.8,59.933333,10316,4.500000,4.250000,5.0,5.0


# training & predictions



I used just a usual logistic regression as a solver. Also tried RandomForest with predicting all targets at once, but it showed poor performance even though i though that it would generalize.

In [7]:
df_train = train
df_train = train.merge(period_stats, on = 'user_id', how = 'left')
df_train = df_train.merge(team_point, on = 'user_id', how = 'left')

df_train.columns = df_train.columns.astype(str)

x = df_train.iloc[:, 5:]
y = df_train.iloc[:, 1:5]

train_index = df_train.iloc[:, 1] >= 0
x_train_val, x_test = x.loc[train_index], x.loc[~train_index]
y_train_val, y_test = y.loc[train_index], y.loc[~train_index]

In [8]:
x_train_val

Unnamed: 0,1_x,2_x,3_x,4_x,max_activity_round,min_activity_round,0,1_y,2_y,3_y,...,score2,score3,score4,score5,score6,mean_scores,Analytical thinking_y,Systemic thinking_y,Adaptability_y,Focus_y
0,7.0,14.0,28.0,10.0,2,4,-0.010846,-0.004563,0.006542,-0.014406,...,-5.0,78.2,25.6,21.0,118.8,39.600000,4.000000,4.000000,5.000000,4.666667
1,21.0,21.0,12.0,8.0,0,4,-0.011788,-0.036137,0.003050,-0.009672,...,24.0,93.4,32.0,22.6,182.8,60.933333,4.500000,4.000000,4.500000,4.000000
2,62.0,44.0,63.0,38.0,2,4,-0.021408,-0.026972,-0.001812,-0.000081,...,12.0,76.6,31.6,29.2,160.4,53.466667,4.666667,4.000000,4.333333,4.333333
3,26.0,28.0,34.0,65.0,3,4,-0.012659,0.001707,-0.001989,-0.005011,...,4.8,73.2,27.8,21.4,133.4,44.466667,4.666667,4.000000,4.333333,4.333333
4,58.0,70.0,194.0,35.0,2,4,-0.010649,-0.004831,-0.005462,-0.003752,...,14.0,83.2,33.2,31.0,175.6,58.533333,4.000000,4.000000,4.000000,4.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,10.0,2.0,14.0,4.0,2,1,0.071447,-0.050347,0.009897,-0.049644,...,-5.4,69.8,25.4,18.2,107.2,35.733333,4.000000,4.000000,4.666667,4.666667
676,1.0,14.0,24.0,1.0,2,0,-0.032127,-0.022519,-0.006176,0.005785,...,21.6,94.2,30.8,12.4,166.4,55.466667,5.000000,4.000000,4.750000,4.500000
677,4.0,17.0,23.0,12.0,2,4,-0.026460,-0.038079,-0.018977,-0.030487,...,17.2,79.8,27.2,16.2,146.6,48.866667,4.333333,3.666667,4.333333,4.333333
678,10.0,44.0,45.0,38.0,2,4,-0.022559,-0.022782,-0.001704,0.015089,...,21.6,85.0,32.8,20.0,168.4,56.133333,4.500000,5.000000,5.000000,5.000000


In [9]:
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size = 0.3, random_state = 102)

result = 0 
sample_preds = []

for i in range(4):
    model = Pipeline([('scaler', MinMaxScaler()), ('logistic', LogisticRegression(max_iter=1000, class_weight = 'balanced', C = 0.2))])

    model.fit(x_train, y_train.values[:, i])
    preds = model.predict(x_val)
    result += recall_score(y_val.values[:, i], preds[:], average='macro', zero_division=True)
    model.fit(x_train_val, y_train_val.values[:, i])
    sample_preds.append(model.predict(x_test))  
    # plot_learning_curve(model, x, y.values[:, i])

print("Recall score",result/4)
sample.iloc[:, 1:] = np.array(sample_preds).T
sample.to_csv('sample.csv', index = False)

Recall score 0.3359622626649506
