In [53]:
## !/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: rohantondulkar,QwQ2000
"""

import numpy as np
import pandas as pd
from dateutil import parser
import time
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval
from gensim.models import Doc2Vec
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import pickle
import re
from progressbar import *    
from pyltr.models.lambdamart import LambdaMART
from pyltr.util.group import check_qids, get_groups

class MyLambdaMART(LambdaMART):
     def _fit_stages(self, X, y, qids, y_pred, random_state,
                    begin_at_stage=0, monitor=None):
        n_samples = X.shape[0]
        do_subsample = self.subsample < 1.0
        sample_weight = np.ones(n_samples, dtype=np.float64)

        n_queries = check_qids(qids)
        query_groups = np.array([(qid, a, b, np.arange(a, b))
                                 for qid, a, b in get_groups(qids)],
                                dtype=np.object)
        assert n_queries == len(query_groups)
        do_query_oob = self.query_subsample < 1.0
        query_mask = np.ones(n_queries, dtype=np.bool)
        query_idx = np.arange(n_queries)
        q_inbag = max(1, int(self.query_subsample * n_queries))

        if self.verbose:
            verbose_reporter = _VerboseReporter(self.verbose)
            verbose_reporter.init(self, begin_at_stage)
        j = 0
        for i in range(begin_at_stage, self.n_estimators):
            j = i
            if do_query_oob:
                random_state.shuffle(query_idx)
                query_mask = np.zeros(n_queries, dtype=np.bool)
                query_mask[query_idx[:q_inbag]] = 1

            query_groups_to_use = query_groups[query_mask]
            sample_mask = np.zeros(n_samples, dtype=np.bool)
            for qid, a, b, sidx in query_groups_to_use:
                sidx_to_use = sidx
                if do_subsample:
                    query_samples_inbag = max(
                        1, int(self.subsample * (b - 1)))
                    random_state.shuffle(sidx)
                    sidx_to_use = sidx[:query_samples_inbag]
                sample_mask[sidx_to_use] = 1

            if do_query_oob:
                old_oob_total_score = 0.0
                for qid, a, b, _ in query_groups[~query_mask]:
                    old_oob_total_score += self.metric.evaluate_preds(
                        qid, y[a:b], y_pred[a:b])

            y_pred = self._fit_stage(i, X, y, qids, y_pred, sample_weight,
                                     sample_mask, query_groups_to_use,
                                     random_state)

            train_total_score, oob_total_score = 0.0, 0.0
            for qidx, (qid, a, b, _) in enumerate(query_groups):
                score = self.metric.evaluate_preds(
                    qid, y[a:b], y_pred[a:b])
                if query_mask[qidx]:
                    train_total_score += score
                else:
                    oob_total_score += score

            self.train_score_[i] = train_total_score / q_inbag
            if do_query_oob:
                if q_inbag < n_queries:
                    self.oob_improvement_[i] = \
                        ((oob_total_score - old_oob_total_score) /
                         (n_queries - q_inbag))

            early_stop = False
            monitor_output = None
            if monitor is not None:
                monitor_output = monitor(i, self, locals())
                if monitor_output is True:
                    early_stop = True

            if self.verbose > 0:
                verbose_reporter.update(i, self, monitor_output)

            if early_stop:
                break

        return j + 1
    
class BestAnswererPredictor:
    """ Model to make real time predictions of best answerer for new questions"""
    
    def __init__(self, train_profile, user_details, user_tag_profile, tags_list, user_text_vec, doc2vec_model, ltr_model ):
        # 2-d numpy array for all questions in the format
        # PostId, QuestionContent, AnswerId, AnswerContent, IsBestAnswer, Score, Tags, AnswererId, AnswerTime, TotalContent
        self.train_profile = train_profile
        
        # Get 2-D array of user details in the format 
        # UserId, AnswerFrequencyInDays, Reputation, NumBestAnsToAnsRatio, MRR, AvgQuesQuality, AvgQuesPopularity
        self.user_details = user_details
        self.user_list = self.user_details[:,0]
        self.num_users = len(self.user_list)

        # Get 2-D array for tag scores on 987 tags per user
        self.user_tag_profile = user_tag_profile
        self.tags_list = tags_list
        
        # Get 2-D array of text vector representation per user
        self.user_text_vec = user_text_vec
        
        self.doc2vec_model = doc2vec_model
        self.ltr_model = ltr_model
        self.stop_words = set(stopwords.words('english'))
        self.pstem = PorterStemmer()
        self.num_features = 9
        self.feature_set = [ self.get_tag_similarity, self.get_cosine_similarity, self.get_user_answer_frequency,
                            self.get_number_best_answers_ratio, self.get_user_reputation, self.get_days_to_prev_ans,
                            self.get_user_MRR, self.get_user_avg_ques_quality, self.get_user_avg_ques_popularity
                           ]
        self.setup()
        
    def setup(self):
        """ Prepare internal data for faster prediction """
        print('Setting up...')
        self.user_train_profiles = {}
        for user in self.user_list:
            self.user_train_profiles[user] = self.train_profile[np.where(self.train_profile[:,7]==user)]
        print('Setup finished!')
        
    def get_train_data(self,data_line,answerer = -1):
        """ Expecting a 1-d numpy array as [QuestionContent, Tags, CreateTime (datetime object)]"""
        # Create a feature set for all ques-user pairs
        features = np.zeros((len(self.user_list), self.num_features + 2))
        features[:, 0] = self.user_list
        
        labels = np.zeros(len(self.user_list),dtype = float)
        for i in range(len(self.user_list)):
            if features[i][0] == answerer:
                labels[i] = 1.0
                break
        
        # Expected order of feature set: 
        # ['SimilarityScore', 'CosineSimilarity', 'AnswerFrequencyInDays', 'NumBestAnsToAnsRatio', 
        #  'Reputation', 'DaysToPrevAns', 'MRR', 'AvgQuesQuality', 'AvgQuesPopularity']     
        for i in range(1,self.num_features):
            #start = time.time()
            features[:, i] = self.feature_set[i-1](data_line)
            #print('Time for feature: {0} is {1} secs'.format(self.feature_set[i-1],time.time()-start))
        return features,labels
    
    def predict(self, test_data, topK):
        """ Expecting a 1-d numpy array as [QuestionContent, Tags, CreateTime (datetime object)]"""
        features,_ = self.get_train_data(test_data)
        features[:,10] = self.ltr_model.predict(features[:,1:self.num_features+1])
        ranked_users = features[features[:,10].argsort()[::-1]][:,0]
        #print(ranked_users[:topK])
        return ranked_users.astype(int)[:topK]
        
    def get_cosine_similarity(self, test_data):
        """ Get cosine similarity between doc2vec representations of question and all users """
        text_score = np.zeros(self.num_users)
        text = test_data[0]
        text = re.compile('\w+').findall(text.lower())
        text = [w for w in text if not w in self.stop_words]
        text = [self.pstem.stem(w) for w in text if len(w)>2]
        text_vec = self.doc2vec_model.infer_vector(text)
        count = 0
        
        for user in self.user_list:
            text_score[count] = cosine_similarity(text_vec.reshape(1, -1), self.user_text_vec[self.user_text_vec[:,0]==user][:,1:])
            count+=1
        return text_score
        
    def get_tag_similarity(self, test_data):
        """ Get tag based similarity between tag profiles of question and all users """
        tags = literal_eval(test_data[1])
        ques_tag_vec = np.zeros(len(self.tags_list))
        
        # Make ques tag vector
        for tag in tags:
            res = np.where(self.tags_list == tag)
            if len(res[0]) != 0:
                i = res[0][0]
                ques_tag_vec[i]=1
            
        # Calculate tag score for all users
        count = 0
        tag_score = np.zeros(self.num_users)
        for user in self.user_list:
            user_tag_vec = self.user_tag_profile[np.where(self.user_tag_profile[:, 0]==user)][:,1:].ravel()
            match = user_tag_vec[np.logical_and(ques_tag_vec, user_tag_vec)]
            tag_score[count] = np.sum(match) * match.shape[0]
            count+=1
        return tag_score
    
    def get_days_to_prev_ans(self, test_data):
        """ Get the number of days since last answer for each user """
        days_to_prev_ans = np.zeros(self.num_users)
        create_time = parser.parse(test_data[2])
        count = 0
        
        for user in self.user_list:
            # Get the list of answers by the given userId
            ans_profile = self.user_train_profiles[user]

            # Get the days since previous answer
            ans_profile = ans_profile[np.where(ans_profile[:,8]<create_time)]

            if len(ans_profile) == 0:
                days_to_prev_ans[count] = 2000
            else:
                ans_profile = ans_profile[ans_profile[:,8].argsort()[::-1]]
                timeDiff = create_time - ans_profile[0, 8]
                days_to_prev_ans[count] = timeDiff.total_seconds()/(3600*24)        # in days
            count+=1
        return days_to_prev_ans
    
    def get_user_answer_frequency(self, test_data):
        """ Get answering frequeny for all users """
        return self.user_details[:,1]
    
    def get_user_reputation(self, test_data):
        """ Get reputation for all users """
        return self.user_details[:,2]
    
    def get_number_best_answers_ratio(self, test_data):
        """ Get number best answers to number of answers ratio"""
        return self.user_details[:,3]
    
    def get_user_MRR(self, test_data):
        """ Get mean reciprocal rank for every user """
        return self.user_details[:, 4]
    
    def get_user_avg_ques_quality(self, test_data):
        """ Get average question quality per user"""
        return self.user_details[:, 5]
    
    def get_user_avg_ques_popularity(self, test_data):
        """ Get average question popularty per user"""
        return self.user_details[:, 6]
    

def sample_predictions():
    """ An example to show how to make sample predictions"""
    # All features should be only for 1339 final set of users
    # Load all the files needed
    doc2vecModel = Doc2Vec.load('../Models/user_doc2vec_1000.model')
    user_details = pd.read_csv('../Dataset/UserInformation.csv')
    train_profile = pd.read_csv('../Dataset/Train_Profile.csv')
    train_profile['AnswerTime'] = pd.to_datetime(train_profile['AnswerTime'])
    test_ltr = pd.read_csv('../Dataset/Sample_test_data.csv')
    user_tag_profile = pd.read_csv('../Dataset/User_tag_profile.csv')
    user_text_vec = pd.read_csv('../Dataset/User_text_vec.csv')

    with open(r"../Models/LTR_ALL_final.pkl", "rb") as input_file:
        ltr_model = pickle.load(input_file)
        
    # Initialize the model
    predictor = BestAnswererPredictor(train_profile = train_profile.values, user_details = user_details.values, \
                                  user_tag_profile = user_tag_profile.values, tags_list = user_tag_profile.columns[1:],\
                                  user_text_vec = user_text_vec.values,\
                                  doc2vec_model = doc2vecModel, ltr_model = ltr_model)
    topK = 50
    for index, row in test_ltr.iterrows():
        start = time.time()
        topK_users = predictor.predict(row.values, topK)
        print(index)
        print('\nPrediction took: {0} secs'.format(time.time()-start))
        print('Top {0} users: {1}'.format(topK, topK_users))

def get_test_metrics(model_path):
    """ Show ACC and MRR on test set"""
    doc2vecModel = Doc2Vec.load('../Models/user_doc2vec_1000.model')
    user_details = pd.read_csv('../Dataset/UserInformation.csv')
    train_profile = pd.read_csv('../Dataset/Train_Profile.csv')
    train_profile['AnswerTime'] = pd.to_datetime(train_profile['AnswerTime'])
    test_ltr = pd.read_csv('../Dataset/Test_LTR.csv')
    user_tag_profile = pd.read_csv('../Dataset/User_tag_profile.csv')
    user_text_vec = pd.read_csv('../Dataset/User_text_vec.csv')

    with open(model_path, "rb") as input_file:
        ltr_model = pickle.load(input_file)
        
    # Initialize the model
    predictor = BestAnswererPredictor(train_profile = train_profile.values, user_details = user_details.values, \
                                  user_tag_profile = user_tag_profile.values, tags_list = user_tag_profile.columns[1:],\
                                  user_text_vec = user_text_vec.values,\
                                  doc2vec_model = doc2vecModel, ltr_model = ltr_model)
    topK = [1,5,10,20,50]
    acc,mrr = [0 for i in range(len(topK))],[0 for i in range(len(topK))]
    start = time.time()
    print('Start calculating metrics...')
    bar = ProgressBar(maxval = len(test_ltr)).start()
    for index, row in test_ltr.iterrows():
        topK_users = predictor.predict([row.values[1],row.values[6],row.values[8]], topK[-1])
        best = row.values[7]
        bestPos = topK[-1] + 1
        for j in range(topK[-1]):
            if topK_users[j] == best:
                bestPos = j
                break
        for j in range(len(topK)):
            if bestPos < topK[j]:
                acc[j] += 1
                mrr[j] += 1 / (bestPos + 1)
        if index % 99 == 0 or index + 1 == len(test_ltr):
            bar.update(index + 1)
            print('\nPrediction took: {0} secs'.format(time.time()-start))
            print('ACC: {0} \nMRR: {1}'.format([v / (index + 1) for v in acc],[v / (index + 1) for v in mrr]))

def train_ltr_model(model_path,output_path):
    """Train LambdaMART model."""
    doc2vecModel = Doc2Vec.load('../Models/user_doc2vec_1000.model')
    user_details = pd.read_csv('../Dataset/UserInformation.csv')
    train_profile = pd.read_csv('../Dataset/Train_Profile.csv')
    train_profile['AnswerTime'] = pd.to_datetime(train_profile['AnswerTime'])
    train_ltr = pd.read_csv('../Dataset/Train_LTR.csv')
    user_tag_profile = pd.read_csv('../Dataset/User_tag_profile.csv')
    user_text_vec = pd.read_csv('../Dataset/User_text_vec.csv')
    
    predictor = BestAnswererPredictor(train_profile = train_profile.values, user_details = user_details.values, \
                                  user_tag_profile = user_tag_profile.values, tags_list = user_tag_profile.columns[1:],\
                                  user_text_vec = user_text_vec.values,\
                                  doc2vec_model = doc2vecModel, ltr_model = None)
    
    x,y,qids = None,None,[]
    div = 500
    for index,row in train_ltr.iterrows():
        if index % div != 0:
            continue
        x0,y0 = predictor.get_train_data([row.values[1],row.values[6],row.values[8]],row.values[7])
        x = x0 if x is None else np.concatenate((x,x0))
        y = y0 if y is None else np.concatenate((y,y0))
        qids += [row.values[0] for i in range(len(predictor.user_list))]
    print('Feature ready.')
    with open(model_path, "rb") as input_file:
        model = MyLambdaMART()
    print('Fitting...')
    model.fit(x,y,qids)
    print(model.feature_importances_)
    print('Saving model...')
    with open(output_path,'wb') as output_file:
        pickle.dump(model,output_file)
    
if __name__ == "__main__":
    #sample_predictions()
    #train_ltr_model(r"../Models/LTR_ALL_final.pkl",r"../Models/LTR_PART.pkl")
    #get_test_metrics(r"../Models/LTR_ALL_final.pkl")
    get_test_metrics(r"../Models/LTR_PART.pkl")

Setting up...


  0% |                                                                        |

Setup finished!
Start calculating metrics...

Prediction took: 0.8316562175750732 secs
ACC: [0.0, 1.0, 1.0, 1.0, 1.0] 
MRR: [0.0, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]


  2% |#                                                                       |


Prediction took: 117.39901232719421 secs
ACC: [0.04, 0.11, 0.19, 0.38, 0.67] 
MRR: [0.04, 0.06566666666666666, 0.07510714285714286, 0.08775451608075442, 0.09698278541388468]


  4% |##                                                                      |


Prediction took: 226.1710183620453 secs
ACC: [0.020100502512562814, 0.08542713567839195, 0.15577889447236182, 0.34673366834170855, 0.678391959798995] 
MRR: [0.020100502512562814, 0.043383584589614735, 0.051818616893993766, 0.06453386374003389, 0.0748190706109348]


  6% |####                                                                    |


Prediction took: 354.0906386375427 secs
ACC: [0.02348993288590604, 0.09731543624161074, 0.17114093959731544, 0.35906040268456374, 0.6610738255033557] 
MRR: [0.02348993288590604, 0.048601789709172255, 0.0573186321508469, 0.06969659336457079, 0.07913184000255787]


  8% |#####                                                                   |


Prediction took: 455.0923161506653 secs
ACC: [0.02518891687657431, 0.10075566750629723, 0.19143576826196473, 0.3602015113350126, 0.654911838790932] 
MRR: [0.02518891687657431, 0.04953820319059613, 0.06090320259086001, 0.07197683403593051, 0.08120237515452236]


 10% |#######                                                                 |


Prediction took: 563.6308634281158 secs
ACC: [0.024193548387096774, 0.0967741935483871, 0.19153225806451613, 0.3548387096774194, 0.6532258064516129] 
MRR: [0.024193548387096774, 0.048051075268817196, 0.06015344982078852, 0.07089545021015364, 0.08032810590642046]


 12% |########                                                                |


Prediction took: 667.096342086792 secs
ACC: [0.020168067226890758, 0.08571428571428572, 0.17647058823529413, 0.33949579831932775, 0.6369747899159663] 
MRR: [0.020168067226890758, 0.04179271708683472, 0.05326330532212884, 0.06404019785546711, 0.07342880308817074]


 14% |##########                                                              |


Prediction took: 756.859610080719 secs
ACC: [0.020172910662824207, 0.09077809798270893, 0.1829971181556196, 0.34438040345821325, 0.6340057636887608] 
MRR: [0.020172910662824207, 0.04320365033621516, 0.05493573029596087, 0.06564525673601937, 0.07484288890505887]


 16% |###########                                                             |


Prediction took: 877.703323841095 secs
ACC: [0.0201765447667087, 0.08953341740226986, 0.17906683480453972, 0.3467843631778058, 0.6456494325346784] 
MRR: [0.0201765447667087, 0.042265657839428324, 0.05369002582117341, 0.06494560580486602, 0.074428583589653]


 18% |############                                                            |


Prediction took: 997.788471698761 secs
ACC: [0.01905829596412556, 0.08408071748878924, 0.1827354260089686, 0.3452914798206278, 0.6502242152466368] 
MRR: [0.01905829596412556, 0.039854260089686094, 0.052549113815930026, 0.06348481244937904, 0.07316050417215504]


 20% |##############                                                          |


Prediction took: 1118.552936553955 secs
ACC: [0.01917255297679112, 0.08375378405650857, 0.18365287588294651, 0.34006054490413723, 0.649848637739657] 
MRR: [0.01917255297679112, 0.04006054490413724, 0.052929140038120936, 0.06348394630573582, 0.07333986293852163]


 22% |###############                                                         |


Prediction took: 1239.898001909256 secs
ACC: [0.01743119266055046, 0.08440366972477065, 0.18073394495412845, 0.3376146788990826, 0.6495412844036698] 
MRR: [0.01743119266055046, 0.03885321100917433, 0.051257463229940375, 0.06185668824678841, 0.07178944322514189]


 24% |#################                                                       |


Prediction took: 1347.823767900467 secs
ACC: [0.018502943650126155, 0.08410428931875526, 0.1808242220353238, 0.34314550042052144, 0.6593776282590412] 
MRR: [0.018502943650126155, 0.03964115503223999, 0.05202250790980826, 0.0628849459871771, 0.0729381459546479]


 26% |##################                                                      |


Prediction took: 1467.444417476654 secs
ACC: [0.017857142857142856, 0.08618012422360248, 0.18555900621118013, 0.34782608695652173, 0.6630434782608695] 
MRR: [0.017857142857142856, 0.03954451345755697, 0.05226017943409256, 0.06317876637981383, 0.07314306655092043]


 28% |####################                                                    |


Prediction took: 1577.3700308799744 secs
ACC: [0.018745493871665464, 0.08868060562364816, 0.18529199711607786, 0.34895457822638787, 0.6568132660418169] 
MRR: [0.018745493871665464, 0.04133621725546747, 0.05364181000446329, 0.0647215568839629, 0.07452698676442057]


 30% |#####################                                                   |


Prediction took: 1682.5347783565521 secs
ACC: [0.020188425302826378, 0.08815612382234186, 0.1864064602960969, 0.35262449528936746, 0.6608344549125168] 
MRR: [0.020188425302826378, 0.04181247196052045, 0.054367482749044085, 0.06559669611267822, 0.0753882876923035]


 32% |#######################                                                 |


Prediction took: 1765.042733669281 secs
ACC: [0.02018927444794953, 0.08832807570977919, 0.18548895899053627, 0.3501577287066246, 0.6637223974763407] 
MRR: [0.02018927444794953, 0.04221871713985283, 0.05455560562816091, 0.06562833217487936, 0.07560414984174878]


 34% |########################                                                |


Prediction took: 1826.8714499473572 secs
ACC: [0.019596199524940617, 0.08669833729216152, 0.18052256532066507, 0.34501187648456055, 0.6644893111638955] 
MRR: [0.019596199524940617, 0.04108273950910534, 0.05300352524224266, 0.06408329802547749, 0.07425971837795253]


 36% |#########################                                               |


Prediction took: 1894.9218950271606 secs
ACC: [0.019068984856982614, 0.08524957936062816, 0.17891194615816042, 0.34604598990465507, 0.6640493550196298] 
MRR: [0.019068984856982614, 0.040409422321929366, 0.05232687017600091, 0.0635748731980733, 0.07371301608412245]


 38% |###########################                                             |


Prediction took: 1960.783040046692 secs
ACC: [0.018597236981934114, 0.08289054197662062, 0.17375132837407015, 0.3427205100956429, 0.6668437832093518] 
MRR: [0.018597236981934114, 0.039257881686149514, 0.0508235919234857, 0.06222583278347032, 0.0725581443547839]


 40% |############################                                            |


Prediction took: 2020.6997563838959 secs
ACC: [0.0176678445229682, 0.08278647147905098, 0.17314487632508835, 0.3397274103987885, 0.6653205451792025] 
MRR: [0.0176678445229682, 0.03861686017163051, 0.05016465950337736, 0.06142993726859343, 0.07180537394936524]


 42% |##############################                                          |


Prediction took: 2084.084417104721 secs
ACC: [0.01730769230769231, 0.08125, 0.17067307692307693, 0.33798076923076925, 0.6625] 
MRR: [0.01730769230769231, 0.03775641025641028, 0.04917315323565327, 0.06047825800691073, 0.07083910839510002]


 44% |###############################                                         |


Prediction took: 2151.7592222690582 secs
ACC: [0.017439192290041303, 0.08352455254703993, 0.1734740706746214, 0.33868747131711796, 0.6622303809086737] 
MRR: [0.017439192290041303, 0.03858038855744226, 0.05002003248905499, 0.06119609510535356, 0.07150711335936909]


 46% |#################################                                       |


Prediction took: 2217.446128129959 secs
ACC: [0.01755926251097454, 0.08428446005267778, 0.17383669885864794, 0.33713784021071114, 0.6633011413520632] 
MRR: [0.01755926251097454, 0.038703541117939715, 0.050080654152208165, 0.06113139572463792, 0.07156803216521235]


 48% |##################################                                      |


Prediction took: 2278.7783637046814 secs
ACC: [0.01724863273033235, 0.08456037021455616, 0.1758519141775347, 0.338662179217501, 0.6630206142196046] 
MRR: [0.01724863273033235, 0.03866217921750106, 0.05018530761063367, 0.06118796878751675, 0.07155629321329421]


 50% |####################################                                    |


Prediction took: 2347.256097793579 secs
ACC: [0.016962843295638127, 0.08602584814216478, 0.17689822294022617, 0.3376413570274637, 0.6615508885298869] 
MRR: [0.016962843295638127, 0.039216478190630046, 0.050692681488319624, 0.061577994944489824, 0.0719545645852066]


 52% |#####################################                                   |


Prediction took: 2410.4463901519775 secs
ACC: [0.0170873786407767, 0.0858252427184466, 0.1766990291262136, 0.33941747572815534, 0.665242718446602] 
MRR: [0.0170873786407767, 0.03932038834951456, 0.050814609338881184, 0.061812180441986, 0.07223127504038304]


 54% |######################################                                  |


Prediction took: 2476.0200233459473 secs
ACC: [0.016454749439042633, 0.08489154824233358, 0.1731488406881077, 0.337696335078534, 0.6679132385938669] 
MRR: [0.016454749439042633, 0.03855023684866616, 0.04972798019731454, 0.06085553144127749, 0.07138842294649043]


 56% |########################################                                |


Prediction took: 2540.003349304199 secs
ACC: [0.016588532275513886, 0.0861882437793004, 0.1730977280923188, 0.3389830508474576, 0.6675081139560043] 
MRR: [0.016588532275513886, 0.03922947469647793, 0.05023697903250735, 0.061451230563546504, 0.07193179479828445]


 58% |#########################################                               |


Prediction took: 2605.6922783851624 secs
ACC: [0.017061281337047353, 0.08600278551532034, 0.17165738161559888, 0.3370473537604457, 0.6636490250696379] 
MRR: [0.017061281337047353, 0.03949628597957288, 0.05036850046425254, 0.061560070659254534, 0.0719639681125741]


 60% |###########################################                             |


Prediction took: 2670.3129262924194 secs
ACC: [0.016492763379333558, 0.0851565129585998, 0.1693032648939751, 0.33187478963312017, 0.6627398182430159] 
MRR: [0.016492763379333558, 0.03875238415797149, 0.049400688133438043, 0.060415976270383656, 0.07096497926062667]


 62% |############################################                            |


Prediction took: 2739.1941311359406 secs
ACC: [0.016612377850162865, 0.08664495114006515, 0.16970684039087947, 0.3299674267100977, 0.6599348534201954] 
MRR: [0.016612377850162865, 0.03944082519001084, 0.04995152784240731, 0.060809935979193576, 0.0713170721103197]


 64% |##############################################                          |


Prediction took: 2803.220799446106 secs
ACC: [0.017986746607762703, 0.0886715052066898, 0.1719785421268539, 0.3329125907226254, 0.661091827074787] 
MRR: [0.017986746607762703, 0.04093299673924476, 0.05154297334795911, 0.062464229726050174, 0.0729073693959505]


 66% |###############################################                         |


Prediction took: 2872.6306993961334 secs
ACC: [0.017747858017135864, 0.0890452876376989, 0.1741126070991432, 0.3356793145654835, 0.6609547123623011] 
MRR: [0.017747858017135864, 0.04089657282741736, 0.05170642206290916, 0.06269311697601691, 0.07304764907935367]


 68% |#################################################                       |


Prediction took: 2937.3734295368195 secs
ACC: [0.01782001782001782, 0.09088209088209089, 0.17671517671517672, 0.3385803385803386, 0.6623106623106623] 
MRR: [0.01782001782001782, 0.04159489159489158, 0.05254206325634891, 0.0635366204464289, 0.07383726316911808]


 70% |##################################################                      |


Prediction took: 3008.6483306884766 secs
ACC: [0.01788805539526832, 0.09088286208886324, 0.1768609347951529, 0.3381419503750721, 0.6618580496249279] 
MRR: [0.01788805539526832, 0.04161377187920752, 0.0526123384533655, 0.06357164081966184, 0.07385467800436941]


 72% |###################################################                     |


Prediction took: 3078.1440920829773 secs
ACC: [0.0182328190743338, 0.09032258064516129, 0.17727910238429173, 0.3399719495091164, 0.6628330995792426] 
MRR: [0.0182328190743338, 0.04157550257129498, 0.05271099089472152, 0.06378405929627382, 0.0740440934275719]


 74% |#####################################################                   |


Prediction took: 3145.0741028785706 secs
ACC: [0.017740174672489083, 0.0892467248908297, 0.17631004366812228, 0.34088427947598254, 0.6607532751091703] 
MRR: [0.017740174672489083, 0.04090247452692865, 0.052040553302834865, 0.06322779677494657, 0.07339712517407507]


 76% |######################################################                  |


Prediction took: 3211.0006420612335 secs
ACC: [0.017804942864735582, 0.08902471432367792, 0.1761892107361148, 0.3406856231730003, 0.658517140579325] 
MRR: [0.017804942864735582, 0.04086721587385948, 0.05199309061918672, 0.06320115906912634, 0.07329362991361117]


 78% |########################################################                |


Prediction took: 3276.1693074703217 secs
ACC: [0.017348524080787155, 0.08855515277058519, 0.17529777317452097, 0.3394614189539099, 0.6584671154842051] 
MRR: [0.017348524080787155, 0.040298636285171725, 0.051371215670801273, 0.06255819899755485, 0.0726807798994044]


 80% |#########################################################               |


Prediction took: 3340.4413554668427 secs
ACC: [0.01691492047462762, 0.0878566018682151, 0.17394597323908104, 0.338803332491795, 0.6589245140116132] 
MRR: [0.01691492047462762, 0.03984684002356304, 0.05085756763363417, 0.062079516628864954, 0.07223842960223764]


 82% |###########################################################             |


Prediction took: 3402.5433666706085 secs
ACC: [0.017241379310344827, 0.08940886699507389, 0.17463054187192117, 0.3399014778325123, 0.6591133004926109] 
MRR: [0.017241379310344827, 0.04059113300492607, 0.05148311048557342, 0.06272934340840851, 0.07285304647972989]


 84% |############################################################            |


Prediction took: 3471.1871404647827 secs
ACC: [0.0175522962250541, 0.08824236595335418, 0.17335898052416446, 0.3385429189709065, 0.657850444818466] 
MRR: [0.0175522962250541, 0.04040634767973066, 0.051297053244636674, 0.06253441283077851, 0.07267279603885321]


 86% |#############################################################           |


Prediction took: 3538.7638561725616 secs
ACC: [0.017144199154532646, 0.08853922029121654, 0.17402536402066698, 0.34030061061531236, 0.6578205730389854] 
MRR: [0.017144199154532646, 0.040406294034758074, 0.05133819067003648, 0.06262828641442662, 0.07272119162739188]


 88% |###############################################################         |


Prediction took: 3626.3867523670197 secs
ACC: [0.017443194858847832, 0.08859306862520083, 0.17351388570117052, 0.3406013311911866, 0.6598577002524673] 
MRR: [0.017443194858847832, 0.04058603014306477, 0.051437752057444386, 0.06279757033558954, 0.07294972395163386]


 90% |################################################################        |


Prediction took: 3719.079612016678 secs
ACC: [0.017280071813285457, 0.08864452423698384, 0.17392280071813285, 0.34066427289048473, 0.6591113105924596] 
MRR: [0.017280071813285457, 0.04055206463195688, 0.051424154341568946, 0.0627675557283459, 0.07289002939177398]


 92% |##################################################################      |


Prediction took: 3810.1741740703583 secs
ACC: [0.01734357848518112, 0.08869374313940724, 0.1743139407244786, 0.3398463227222832, 0.6577387486278814] 
MRR: [0.01734357848518112, 0.04050493962678372, 0.05144277176659161, 0.06271461718760456, 0.07283681658385896]


 94% |###################################################################     |


Prediction took: 3913.2901401519775 secs
ACC: [0.016974645466265578, 0.08766652342071336, 0.17318435754189945, 0.33820369574559517, 0.6568543188654921] 
MRR: [0.016974645466265578, 0.03994413407821225, 0.0508772961985251, 0.06212567076261993, 0.07228239771771061]


 96% |#####################################################################   |


Prediction took: 4034.5292711257935 secs
ACC: [0.016831474857984433, 0.08689248895434462, 0.17273301073006522, 0.33705028403113824, 0.657269093204292] 
MRR: [0.016831474857984433, 0.03953993968721505, 0.0505185530274946, 0.061714059107818395, 0.07193299954844153]


 98% |######################################################################  |


Prediction took: 4148.9869611263275 secs
ACC: [0.016488046166529265, 0.08635614179719703, 0.1718878812860676, 0.3365622423742787, 0.6589035449299258] 
MRR: [0.016488046166529265, 0.03920376477054132, 0.05013061215142816, 0.061345120523814335, 0.07162057010647256]


100% |########################################################################|


Prediction took: 4262.6138434410095 secs
ACC: [0.016380182002022244, 0.08574317492416582, 0.1716885743174924, 0.3356926188068756, 0.6590495449949444] 
MRR: [0.016380182002022244, 0.03897876643073808, 0.049984993660423235, 0.0611544782132468, 0.07144793716954363]


In [29]:
import pickle
import pyltr

with open(r"../Models/LTR_ALL_final.pkl", "rb") as input_file:
    ltr_model = pickle.load(input_file)
    for attr in dir(ltr_model):
        print(attr,getattr(ltr_model,attr))

__class__ <class '__main__.MyLambdaMART'>
__delattr__ <method-wrapper '__delattr__' of MyLambdaMART object at 0x00000270848A9358>
__dict__ {'metric': <pyltr.metrics.dcg.NDCG object at 0x00000270848A9080>, 'learning_rate': 0.1, 'n_estimators': 100, 'query_subsample': 1.0, 'subsample': 1.0, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 3, 'random_state': None, 'max_features': None, 'verbose': 0, 'max_leaf_nodes': None, 'warm_start': True, 'n_features': 11, 'max_features_': 11, 'estimators_': array([[DecisionTreeRegressor(criterion='friedman_mse', max_depth=3,
           max_features=None, max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=True,
           random_state=<mtrand.RandomState object at 0x0000027088C93510>,
           splitter='best')],
       [DecisionTreeRegressor(criterion='friedman_mse', max_depth=3,
           max_features=Non