In [1]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.sparse as sparse

import math
from sklearn.metrics import accuracy_score
from scipy.stats import spearmanr as Spearman
# import nltk
# nltk.download()
#from nltk.corpus import stopwords
#from nltk.corpus import words
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import re
from bs4 import BeautifulSoup

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

# for modeling
from sklearn.linear_model import LogisticRegression as LogReg
from sklearn.linear_model import LogisticRegressionCV as LogRegCV
from sklearn.ensemble import VotingClassifier
from sklearn import model_selection
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import cohen_kappa_score
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression as LogReg
from sklearn.linear_model import LogisticRegressionCV as LogRegCV
from sklearn.ensemble import RandomForestClassifier
from sklearn import discriminant_analysis as da
from sklearn import tree
# from sklearn.cross_validation import cross_val_predict 
# from sklearn import cross_validation
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import cohen_kappa_score
from nltk.corpus import wordnet
%matplotlib inline


## <span style="color:blue">append_regularised_score</span>
#### <span style="color:green">INPUT:</span> dataframe 
- scores of each essay set are normalized by mean and standard deviation x = (x-mu)/std
- creates new feature in dataframe "std_score"
#### <span style="color:purple">OUTPUT:</span> new dataframe

## <span style="color:blue">create_regularization_data</span>
#### <span style="color:green">INPUT:</span> dataframe 
- finds the mean and standard deviation of each essay
- creates a list with essay number, mean of score, standard deviation of score
#### <span style="color:purple">OUTPUT:</span> Regularised list

In [2]:
def append_regularized_scores(old_df):
    new_df = old_df.copy()
    new_df['std_score'] = new_df.groupby(['essay_set'])[['score']].apply(lambda x: (x - np.mean(x)) / (np.std(x)))
    return new_df

def create_regularization_data(old_df):
    #getting the number of datasets
    max_essay_set = max(old_df['essay_set'])
    #list of the regularized values
    regularization_data = []
    for i in range(max_essay_set+1):
        mean = np.mean((old_df[old_df['essay_set'] == i + 1])['score'])
        std = np.std((old_df[old_df['essay_set'] == i + 1])['score'])
        regularization_data.append([i + 1, mean, std])
    return regularization_data

## Read Training Data
- Scores of each essay becomes the aversage score of domain 1 and domain 2
- create regularisation list
- append regularised score to dataframe

In [3]:
# Read in training data
# Note that for essay set 2, score becomes average of 2 domain scores
train_cols = ['essay_id', 'essay_set', 'essay', 'domain1_score', 'domain2_score']
train_df = pd.read_csv('training_set_rel3.tsv', delimiter='\t', usecols=train_cols,dtype={'essay_set':int},encoding = "ISO-8859-1")
for i in range(train_df.shape[0]):
    if not np.isnan(train_df.get_value(i, 'domain2_score')):
        assert (train_df.get_value(i, 'essay_set') == 2)
        new_val = train_df.get_value(i, 'domain1_score') + train_df.get_value(i, 'domain2_score')
        train_df.set_value(i, 'domain1_score', new_val) 
train_df = train_df.drop('domain2_score', axis=1)
train_df = train_df.rename(columns={'domain1_score': 'score'})

regularization_data = create_regularization_data(train_df)
train_df = append_regularized_scores(train_df)

print ("The regularized data for each essay set = ", regularization_data)
print ("\n")

#validate that the standardization works
max_essay_set = max(train_df['essay_set'])
for i in range (max_essay_set):
    valid = train_df[train_df["essay_set"] == i + 1]["std_score"]
    print ("mean and standard deviation of essay set " + str(i + 1) + " = ", np.mean(valid), ",", np.std(valid))

train_df.head()

The regularized data for each essay set =  [[1, 8.528323051037576, 1.5381336495587767], [2, 6.749444444444444, 1.3844371990179603], [3, 1.8482039397450754, 0.8149207612821795], [4, 1.4322033898305084, 0.9395167668768533], [5, 2.4088642659279778, 0.9705520523317599], [6, 2.72, 0.970360757656664], [7, 16.062460165710643, 4.583888354164165], [8, 36.95020746887967, 5.749521294509325], [9, nan, nan]]


mean and standard deviation of essay set 1 =  5.145133400155731e-16 , 1.0000000000000064
mean and standard deviation of essay set 2 =  1.8861455607242937e-16 , 1.0000000000000007
mean and standard deviation of essay set 3 =  -8.542156296073047e-17 , 0.9999999999999976
mean and standard deviation of essay set 4 =  -1.3303858956101453e-16 , 1.0000000000000004
mean and standard deviation of essay set 5 =  1.1314433539046957e-16 , 0.999999999999986
mean and standard deviation of essay set 6 =  -5.913787977836668e-16 , 0.9999999999999828
mean and standard deviation of essay set 7 =  1.320026164715

Unnamed: 0,essay_id,essay_set,essay,score,std_score
0,1,1,"Dear local newspaper, I think effects computer...",8,-0.343483
1,2,1,"Dear @CAPS1 @CAPS2, I believe that using compu...",9,0.306655
2,3,1,"Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...",7,-0.993622
3,4,1,"Dear Local Newspaper, @CAPS1 I have found that...",10,0.956794
4,5,1,"Dear @LOCATION1, I know having computers has a...",8,-0.343483


#### Show nothing is empty in training set

In [4]:

if train_df.isnull().any().any():
    print ('Training data is missing!')
else:
    print ('No missing training data!')

No missing training data!


## <span style="color:blue">vectorizer_clean</span>
#### <span style="color:green">INPUT:</span> old dataframe 
- cleans essay and returns essay with only lowecase words separated by space
#### <span style="color:purple">OUTPUT:</span> new dataframe with cleaned essay

In [5]:
def vectorizer_clean(old_df):
    new_df = old_df.copy()
    for i in range(new_df.shape[0]):
        new_df.set_value(i, 'essay', " ".join(re.sub('[^a-zA-Z\d\s]', '', new_df['essay'].iloc[i]).lower().split())) 
    return new_df

In [6]:
# essay is now just lowercase words separated by space
vectorizer_train = vectorizer_clean(train_df)
print (vectorizer_train.head())

   essay_id  essay_set                                              essay  \
0         1          1  dear local newspaper i think effects computers...   
1         2          1  dear caps1 caps2 i believe that using computer...   
2         3          1  dear caps1 caps2 caps3 more and more people us...   
3         4          1  dear local newspaper caps1 i have found that m...   
4         5          1  dear location1 i know having computers has a p...   

   score  std_score  
0      8  -0.343483  
1      9   0.306655  
2      7  -0.993622  
3     10   0.956794  
4      8  -0.343483  


## Generating 'y' for classification as well as regression

In [7]:
y_reg = vectorizer_train['std_score']
train_std_scores = np.asarray(vectorizer_train['std_score'], dtype="byte")

# TFIDF Vectorizer
- Create vectors from essays

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
text = train_essays = vectorizer_train['essay'].values
print(text.shape)
vectorizer = TfidfVectorizer()
vectorizer.fit(text)
vector = vectorizer.transform(text)
print(vector.shape)
train_vectors1 = vector.toarray()
len(train_std_scores)

(12976,)
(12976, 43081)


12976

# Count Unique words
## <span style="color:blue">fill_unique_words_column</span>
#### <span style="color:green">INPUT:</span> dataframe 
- counts the number of unique words 
- Calculates ((no. of unique words) / (total words))
- returns a list with percentages
#### <span style="color:purple">OUTPUT:</span> list of percentages

In [9]:
from collections import Counter
def fill_unique_words_column(train_dff):

    #percentage of unique words to the total number of words
    unique_word_percentages_train = []

    for i in range(len(train_df)):
        splits = train_df.iloc[i]["essay"].split()
        total_words = len(splits)
        unique_words = len(Counter(splits))
        percentage = float(unique_words) / total_words
        unique_word_percentages_train.append(percentage)

    return unique_word_percentages_train

In [10]:
unique = fill_unique_words_column(train_df)

# Count Misspelled words

## <span style="color:blue">percentage_correct_spelling</span>
#### <span style="color:green">INPUT:</span> a single essay text 
- checks if each word in the essay is a valid word or not using the wordnet database
- generates percentage of correctly spelled words
#### <span style="color:purple">OUTPUT:</span> list

In [11]:
# input is list of words in text, output percentage spelling correct
def percentage_correct_spelling(text):
    text_len = len(text)
    correct = 0
    for word in text:
        try:
            if wordnet.synsets(word):
                correct += 1
        except:
            correct+= 0
    return 1. * correct / text_len

In [12]:
spelling_feature_x = []
for train in train_essays:
    sentence = train.split()
    percent = percentage_correct_spelling(sentence)
    spelling_feature_x.append([percent])
spelling_feature_x[0]

[0.685459940652819]

# Number of Sentences
## <span style="color:blue">sentences</span>
#### <span style="color:green">INPUT:</span> essay text
- generates the number of sentences in the essay
#### <span style="color:purple">OUTPUT:</span> length of the sentence

In [13]:
def sentences(par):
    split_sent = re.split(r'[.!?]+', par)
    return len(split_sent)

In [14]:
numOfSent_train = []
for essay in train_df['essay']:
    sent = sentences(essay)
    numOfSent_train.append(sent)

In [15]:
len(numOfSent_train)

12976

# Generate POS Tags
## <span style="color:blue">create_tags_dict</span>
#### <span style="color:green">INPUT:</span> essay
- calculates proportion of each part of speech in essay
#### <span style="color:purple">OUTPUT:</span> dict(tag:proportion)

## <span style="color:blue">fill_pos_columns</span>
#### <span style="color:green">INPUT:</span> datxaframe
- calculates proportion of each part of speech in each essay of the dataframe
- appends it to the repective column in the dataframe
#### <span style="color:purple">OUTPUT:</span> new dataframe 

In [16]:
UNIV_TAGS = ['ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'PRT', 'PRON', 'VERB', '.', 'X']

def create_tags_dict(essay):
    text = word_tokenize(essay)
    num_tokens = len(text)
    tagged_words = nltk.pos_tag(text, tagset='universal')
    tags_only = [tag for _, tag in tagged_words]
    fd = FreqDist(tags_only)
    tags_dict = {}
    for pos in UNIV_TAGS:
        tags_dict[pos] = float(fd[pos]) / num_tokens

    return tags_dict

def fill_pos_columns(df):
    for pos in UNIV_TAGS:
        df[pos] = pd.Series([0.0] * df.shape[0], index=df.index)

    for i in range(df.shape[0]):
        essay = df.get_value(i, 'essay')
        tags = create_tags_dict(essay)
        #print (tags)
        for pos in UNIV_TAGS:
            df = df.set_value(i, pos, tags[pos])
    
    return df['ADJ'],df['ADP'],df['ADV'],df['CONJ'],df['DET'],df['NOUN'],df['NUM'],df['PRT'],df['PRON'],df['VERB'],df['.'],df['X']

In [17]:
lADJ, lADP, lADV, lCONJ, lDET, lNOUN, lNUM, lPRT, lPRON, lVERB, lfullstop, lX= \
fill_pos_columns(train_df)
lADP.shape

(12976,)

# Calculate Perplexity
## <span style="color:blue">perplexity_clean</span>
#### <span style="color:green">INPUT:</span> df
- cleans the essays
#### <span style="color:purple">OUTPUT:</span> list of strings
## <span style="color:blue">Perplexity</span>
#### <span style="color:green">INPUT:</span> dataframe
- class that helps calculate perplexities of each essay
- appends all the the perplexities into a list which is later used as a feature
#### <span style="color:purple">OUTPUT:</span> list of perplexities

In [18]:
#perplexity
def perplexity_clean(df):
    essays_string = ""
    for i in range(df.shape[0]):
    	essay = df.get_value(i, 'essay')
    	essays_string += (" ".join(re.sub('[^a-zA-Z\d\s]', '', essay).lower().split()))
    return [essays_string]

class Perplexity:
    def __init__(self):
        self.num_words = None
        self.counts = None
        self.vectorizer = None

    def create_counts(self, compressed_essays):
        self.vectorizer = CountVectorizer().fit(compressed_essays)
        self.counts = self.vectorizer.transform(compressed_essays).toarray()[0]

        # length added for LaPlace smoothing
        self.num_words = float(sum(self.counts) + len(self.counts))

    def fill_perplexity_columns(self, train_df):
        print("Creating ngram counts...")
        self.create_counts(perplexity_clean(train_df))
        train_clean = vectorizer_clean(train_df)
        for i in range(train_clean.shape[0]):
            essay = train_df.get_value(i, 'essay')
            perp = self.perplexity(essay)
            train_df = train_df.set_value(i, 'perplexity', perp)
        return train_df['perplexity']

    # After having already fit model on a set of training essays, calculates the
    # perplexity of a student's essay based from the model, and returns this
    # perplexity to be used as a feature
    def perplexity(self, test_essay):
        log_prob = 0.0
        word_list = test_essay.split()
        for word in word_list:
            if word in self.vectorizer.vocabulary_:
                log_prob += math.log( (self.counts[self.vectorizer.vocabulary_[word]] + 1.0) / self.num_words)
            else:
                log_prob += math.log (1.0 / self.num_words)

        return math.pow(2.0, -log_prob / len(word_list))

In [19]:
perp = Perplexity().fill_perplexity_columns(train_df)
perp

Creating ngram counts...


0         273.964130
1         297.652243
2         236.759940
3         354.237007
4         237.657729
5         226.584627
6         284.796399
7         316.965611
8         379.586216
9         217.551144
10        459.728385
11        287.106295
12        202.291153
13        294.971815
14        250.868614
15        323.696663
16        240.958775
17        265.156884
18       1905.298557
19        382.859252
20        211.703701
21        130.271133
22        240.800801
23        396.751435
24        201.791930
25        274.297495
26        261.197957
27        330.107505
28        335.627776
29        245.131859
            ...     
12946     454.539369
12947     301.221928
12948     308.559467
12949     229.827426
12950     326.853295
12951     425.548438
12952     413.852151
12953     431.997842
12954     376.876596
12955     288.586590
12956     301.297719
12957     386.473196
12958     199.893427
12959     461.434296
12960     328.436666
12961     433.683195
12962     383

# Generate test train dataset
## <span style="color:blue">separate</span>
#### <span style="color:green">INPUT:</span> Complete dataset (vectors + features)
- separates the vectors and features
#### <span style="color:purple">OUTPUT:</span> (vectors, features)

In [61]:
spell, sent = spelling_feature_x, numOfSent_train
y = train_std_scores
X = []
t = train_vectors1
#print(type(t))
for i,j in zip(range(len(spell)),t):
    #print(j)
    X.append([j,perp[i],spell[i][0],sent[i],unique[i],lADJ[i], lADP[i], lADV[i], lCONJ[i], lDET[i], lNOUN[i], lNUM[i], lPRT[i], lPRON[i], lVERB[i], lfullstop[i], lX[i]])
#print(X[0])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [62]:
def separate(X):
    vector=[]
    features=[]
    for i in X:
        vector.append(i[0])
        features.append([i[1],i[2]])
    return sparse.csr_matrix(vector),features

vec_train,fea_train = separate(X_train)
vec_test, fea_test = separate(X_test)
vec_train

<9732x43081 sparse matrix of type '<class 'numpy.float64'>'
	with 1052778 stored elements in Compressed Sparse Row format>

# Logistic Regression

In [63]:
logistic_l2 = LogReg(penalty='l2', solver='liblinear')
logistic_l2.fit(vec_train, y_train)
pred2a = logistic_l2.predict(vec_test)
print(accuracy_score(pred2a, y_test))

logistic_l2 = LogReg(penalty='l2', solver='liblinear')
logistic_l2.fit(fea_train, y_train)
pred2b = logistic_l2.predict(fea_test)
print(accuracy_score(pred2b, y_test))



0.7043773119605425




0.6707768187422934


In [64]:
logistic_l1 = LogReg(penalty='l1', solver='liblinear')
logistic_l1.fit(vec_train, y_train)
pred1a = logistic_l1.predict(vec_test)
print(accuracy_score(pred1a, y_test))

logistic_l1 = LogReg(penalty='l1', solver='liblinear')
logistic_l1.fit(fea_train, y_train)
pred1b = logistic_l1.predict(fea_test)
print(accuracy_score(pred1b, y_test))



0.7065351418002466
0.6707768187422934




In [65]:
from collections import Counter
print(Counter(y_test))
print(Counter(pred1a))

Counter({0: 2180, 1: 499, -1: 494, -2: 45, 2: 18, -3: 4, -4: 3, 4: 1})
Counter({0: 2922, -1: 242, 1: 80})


In [66]:
# create the ensemble model
estimator = []
estimator.append(('logisticL1',logistic_l1))
estimator.append(('logisticL2',logistic_l2))
#estimator.append(('Ridge',ridge))
#estimator.append(('Lasso',lasso))
ensemble = VotingClassifier(estimator)
v, f = separate(X)

In [67]:
kfold = model_selection.KFold(n_splits=10, random_state=7)
results = model_selection.cross_val_score(ensemble, f, y, cv=kfold)
print(results.mean())



0.6743980716433444


In [68]:
kfold = model_selection.KFold(n_splits=10, random_state=7)
results = model_selection.cross_val_score(ensemble, v, y, cv=kfold)
print(results.mean())



0.7045229420031768


# Ridge Regression

In [69]:
y = train_std_scores
X_train, X_test, y_train, y_test = train_test_split(X,y_reg)
vec_train,fea_train = separate(X_train)
vec_test, fea_test = separate(X_test)

In [53]:
from sklearn import linear_model
ridge = linear_model.Ridge(alpha = 0.05)
ridge.fit(vec_train, y_train)
pred3a = ridge.predict(vec_test)
#print(accuracy_score(pred3a, y_test))

ridge = linear_model.Ridge(alpha = 0.05)
ridge.fit(fea_train, y_train)
pred3b = ridge.predict(fea_test)
#print(accuracy_score(pred3b, y_test))

In [54]:
corr, p = Spearman(a = pred3a, b = y_test)
print ("Ridge: TFIDF", corr)
corr, p = Spearman(a = pred3b, b = y_test)
print ("Ridge: FEATURES", corr)

Ridge: TFIDF 0.6518760968649936
Ridge: FEATURES 0.10155101392527537


In [55]:
lasso = linear_model.Lasso(alpha = 0.5)
lasso.fit(vec_train, y_train)
pred4a = lasso.predict(vec_test)
#print(accuracy_score(pred3a, y_test))

lasso = linear_model.Ridge(alpha = 0.05)
lasso.fit(fea_train, y_train)
pred4b = lasso.predict(fea_test)
#print(accuracy_score(pred3b, y_test))

In [56]:
corr, p = Spearman(a = pred4a, b = y_test)
print ("Lasso: TFIDF", corr)
corr, p = Spearman(a = pred4b, b = y_test)
print ("Lasso: FEATURES", corr)

Lasso: TFIDF nan
Lasso: FEATURES 0.10155101392527537


  c /= stddev[:, None]
  c /= stddev[None, :]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [57]:
pred3a

array([ 0.13008353, -0.862997  , -0.99807548, ...,  0.45930567,
        0.28549115,  0.78389842])

# Combining models

In [58]:
from scipy import stats
def combine(true, pred):
    array = []
    for i in true:
        val = stats.trim_mean(i,0.2)
        array.append(val)
    return Spearman(array,pred)

def make(pred3a, pred3b, pred4a,pred4b):
    pred_array=[]
    for a,b,c,d in zip(pred3a,pred3b,pred4a,pred4b):
        pred_array.append([a,b,c,d])
    return pred_array
pred_array=make(pred3a, pred3b, pred4a,pred4b)
combine(pred_array,y_test)

  return np.mean(atmp[sl], axis=axis)


SpearmanrResult(correlation=0.6388854352480297, pvalue=0.0)

# Without POS tag and perplexity

In [32]:
spell, sent = spelling_feature_x, numOfSent_train
y = train_std_scores
X = []
t = train_vectors1
print(type(t))
for i,j in zip(range(len(spell)),t):
    #print(j)
    X.append([j,spell[i][0],sent[i],unique[i]])
print(X[0])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y)

<class 'numpy.ndarray'>
[array([0., 0., 0., ..., 0., 0., 0.]), 0.685459940652819, 17, 0.5443786982248521]


# Logistic Regression

In [33]:
logistic_l2 = LogReg(penalty='l2', solver='liblinear', n_jobs=4)
logistic_l2.fit(vec_train, y_train)
pred2a = logistic_l2.predict(vec_test)
print(accuracy_score(pred2a, y_test))

logistic_l2 = LogReg(penalty='l2', solver='liblinear', n_jobs=4)
logistic_l2.fit(fea_train, y_train)
pred2b = logistic_l2.predict(fea_test)
print(accuracy_score(pred2b, y_test))

  " = {}.".format(effective_n_jobs(self.n_jobs)))


0.6707768187422934
0.6707768187422934


  " = {}.".format(effective_n_jobs(self.n_jobs)))


In [34]:
logistic_l1 = LogReg(penalty='l1', solver='liblinear', n_jobs=4)
logistic_l1.fit(vec_train, y_train)
pred1a = logistic_l1.predict(vec_test)
print(accuracy_score(pred1a, y_test))

logistic_l1 = LogReg(penalty='l1', solver='liblinear', n_jobs=4)
logistic_l1.fit(fea_train, y_train)
pred1b = logistic_l1.predict(fea_test)
print(accuracy_score(pred1b, y_test))

  " = {}.".format(effective_n_jobs(self.n_jobs)))


0.6707768187422934
0.6707768187422934


  " = {}.".format(effective_n_jobs(self.n_jobs)))


In [None]:
kfold = model_selection.KFold(n_splits=10, random_state=7)
results = model_selection.cross_val_score(ensemble, f, y, cv=kfold)
print(results.mean())



0.7045229420031768


# Ridge Regression

In [37]:
y = train_std_scores
X_train, X_test, y_train, y_test = train_test_split(X,y_reg)
vec_train,fea_train = separate(X_train)
vec_test, fea_test = separate(X_test)

In [38]:
from sklearn import linear_model
ridge = linear_model.Ridge(alpha = 0.05)
ridge.fit(vec_train, y_train)
pred3a = ridge.predict(vec_test)
#print(accuracy_score(pred3a, y_test))

ridge = linear_model.Ridge(alpha = 0.05)
ridge.fit(fea_train, y_train)
pred3b = ridge.predict(fea_test)
#print(accuracy_score(pred3b, y_test))

In [39]:
corr, p = Spearman(a = pred3a, b = y_test)
print ("Ridge: TFIDF", corr)
corr, p = Spearman(a = pred3b, b = y_test)
print ("Ridge: FEATURES", corr)

Ridge: TFIDF 0.6434234452517597
Ridge: FEATURES 0.4047207740600403


In [40]:
lasso = linear_model.Lasso(alpha = 0.5)
lasso.fit(vec_train, y_train)
pred4a = lasso.predict(vec_test)
#print(accuracy_score(pred3a, y_test))

lasso = linear_model.Ridge(alpha = 0.05)
lasso.fit(fea_train, y_train)
pred4b = lasso.predict(fea_test)
#print(accuracy_score(pred3b, y_test))

In [41]:
corr, p = Spearman(a = pred4a, b = y_test)
print ("Lasso: TFIDF", corr)
corr, p = Spearman(a = pred4b, b = y_test)
print ("Lasso: FEATURES", corr)

Lasso: TFIDF nan
Lasso: FEATURES 0.4047207740600403


  c /= stddev[:, None]
  c /= stddev[None, :]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


# Combining models

In [42]:
from scipy import stats
def combine(true, pred):
    array = []
    for i in true:
        val = stats.trim_mean(i,0.2)
        array.append(val)
    return Spearman(array,pred)

def make(pred3a, pred3b, pred4a,pred4b):
    pred_array=[]
    for a,b,c,d in zip(pred3a,pred3b,pred4a,pred4b):
        pred_array.append([a,b,c,d])
    return pred_array
pred_array=make(pred3a, pred3b, pred4a,pred4b)
combine(pred_array,y_test)

  return np.mean(atmp[sl], axis=axis)


SpearmanrResult(correlation=0.6385863721747177, pvalue=0.0)