#   Comparison of a base KNN to a KNN w/ratings

In [247]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.neighbors import NearestNeighbors
import ast
import requests

In [248]:
response = requests.post('http://underdog-devs-ds-a-dev.us-east-1.elasticbeanstalk.com/Mentors/read')
mentors = pd.DataFrame(response.json()['result'])

In [249]:
response = requests.post('http://underdog-devs-ds-a-dev.us-east-1.elasticbeanstalk.com/Mentees/read')
mentees = pd.DataFrame(response.json()['result'])

One hot encoding dataset generated by local_utils

In [250]:
def ohe(mentors, mentees):

    mentors = mentors.drop(columns=['first_name', 'last_name',
                           'email', 'city', 'state', 'validateStatus', 'country'])

    mentors = pd.merge(mentors, pd.get_dummies(mentors, columns=['current_comp', 'experience_level', 'job_help', 'industry_knowledge',
                          'pair_programming', 'other_info', 'formerly_incarcerated', 'list_convictions', 'underrepresented_group', 'low_income']).fillna(0))

    mentors = mentors.join(mentors['subject'].apply(
        pd.Series).stack().str.get_dummies().groupby(level=0).sum())

    mentors = mentors.drop(columns=['subject', 'current_comp', 'experience_level', 'job_help', 'industry_knowledge',
                           'pair_programming', 'other_info', 'formerly_incarcerated', 'list_convictions', 'underrepresented_group', 'low_income'])
    
    mentors = mentors.set_index('profile_id')
    mentors.index.rename('mentors_profile_id', inplace=True)

    #------------------------------------------------------------

    mentees = mentees.drop(columns=['first_name', 'last_name',
                           'email', 'city', 'state', 'validateStatus', 'country', 'list_convictions'])

    mentees = pd.merge(mentees, pd.get_dummies(mentees, columns=['experience_level', 'job_help', 'industry_knowledge',
                          'pair_programming', 'other_info', 'formerly_incarcerated', 'underrepresented_group', 'low_income']).fillna(0))

    mentees = mentees.join(mentees['subject'].apply(
        pd.Series).stack().str.get_dummies().groupby(level=0).sum())
    mentees = mentees.drop(columns=['subject', 'experience_level', 'job_help', 'industry_knowledge',
                           'pair_programming', 'other_info', 'formerly_incarcerated', 'underrepresented_group', 'low_income'])

    mentees = mentees.set_index('profile_id')
    mentees.index.rename('mentees_profile_id', inplace=True)

    return mentors, mentees


In [251]:
#the driving force of selection is mentee to mentor <---- mentee like system
mentors, mentees = ohe(mentors, mentees)

# Generate a fake review matrix

In [252]:
#This needs to be changed to the review data from the live server eventually
reviews = sp.sparse.random(mentors.shape[0], mentees.shape[0], density=0.1, random_state=42)
reviews = pd.DataFrame(reviews.todense(), index=mentors.index, columns=mentees.index)
reviews = reviews.melt(ignore_index=False ,value_name='rating')

In [253]:
reviews

Unnamed: 0_level_0,mentees_profile_id,rating
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1
vDw14633y666xVTo,z781i5e3EhH82A7u,0.000000
H8L6d7kG2E1J8f58,z781i5e3EhH82A7u,0.000000
37T2KouYl5477V1o,z781i5e3EhH82A7u,0.000000
224Ks8ALel5OW253,z781i5e3EhH82A7u,0.000000
t82ZE41q1fHbx086,z781i5e3EhH82A7u,0.000000
...,...,...
cfz4dysa,9fhtw954,0.000000
cq972k0i,9fhtw954,0.279276
00u13oned0U8XP8Mb4x7,9fhtw954,0.000000
2gvkshpc,9fhtw954,0.000000


In [254]:
reviews = reviews.pivot(columns = 'mentees_profile_id', values = 'rating').fillna(0)

In [255]:
reviews

mentees_profile_id,001lJh12502oVmee,0AII175WS04xG1N3,0L3YtO8sC2555oS4,0xx22Xf564qn6Rp4,1138hHTZ32O02PMj,11cj8uT0kKwy7567,17f37I04BKdx82pJ,1D2wOOV6g2p7x575,1F5E6YYd85104yXT,207wj4XJoFOU7442,...,r18K6ih76OR7z61l,rB3tJ4323v2HzB57,vGd05Ag1v6548qv2,vuQ555270P4UBe2v,w82l1e4I43KVB31b,x8564JhRkaU72D60,xjDnK275381UU0R7,yh3i2p2M0e32h34o,z781i5e3EhH82A7u,z7p3E5F63KK8Z18u
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0071UfJ4WLt18Z3F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.572049,0.0,0.0,0.0
00u13oned0U8XP8Mb4x7,0.0,0.0,0.0,0.0,0.0,0.0,0.52333,0.0,0.0,0.152104,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0lO7us717v3UI31U,0.0,0.0,0.0,0.0,0.748826,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
224Ks8ALel5OW253,0.0,0.0,0.0,0.902453,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2gvkshpc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.739904,0.881928,0.0,0.0,0.0,0.0
37T2KouYl5477V1o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.907195,0.0,0.79838,...,0.0,0.0,0.0,0.0,0.0,0.0,0.882416,0.0,0.0,0.0
3e8pkv7m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6066s0sv35RnW5Eo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.652224,0.0,0.0,0.0,0.0
632KLg2a8I4yuH71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087093,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.384511,0.0,0.0,0.0
6gc4Q834RO1h8VT2,0.0,0.0,0.0,0.0,0.0,0.0,0.104225,0.0,0.0,0.0,...,0.0,0.0,0.593661,0.0,0.005506,0.0,0.0,0.0,0.0,0.0


In [256]:
mentors.columns

Index(['current_comp_Amazin', 'current_comp_Amozonian', 'current_comp_Boogle',
       'current_comp_Lahoo', 'current_comp_Macrohard',
       'current_comp_Pineapple', 'current_comp_Poptrist',
       'current_comp_Toyota', 'current_comp_Unemployed',
       'experience_level_Advanced', 'experience_level_Beginner',
       'experience_level_Expert', 'experience_level_Intermediate',
       'experience_level_beginner', 'experience_level_expert',
       'job_help_False', 'job_help_True', 'job_help_true',
       'industry_knowledge_False', 'industry_knowledge_True',
       'industry_knowledge_true', 'pair_programming_False',
       'pair_programming_True', 'pair_programming_false', 'other_info_',
       'other_info_New Job', 'other_info_Notes', 'other_info_none',
       'formerly_incarcerated_False',
       'list_convictions_robbery, dance battle, etc...',
       'underrepresented_group_False', 'low_income_True', 'Android: Java',
       'Career Development', 'Data Science: Python', 'General Pr

In [262]:
#merge the reviews on profile id
combined = reviews.merge(mentors, left_index=True, right_index=True)

In [263]:
#KNN model
def create_model(df, neighbors_number):
    model = NearestNeighbors(n_neighbors=min(
        neighbors_number+1, df.shape[0]), metric='cosine')
    model.fit(df)
    return model

# Create a KNN with ratings and one without

In [266]:
#Create model
knn_collab = create_model(combined, neighbors_number=10)
knn = create_model(mentors, neighbors_number=10)


#### Perhaps a better approach would be to filter the data further and then apply KNN based on rating so that the weights are focused more on ratings rather than looking at the distance of other features?

random mentee#6967999171653,Raiden,Jones,"['Career Preparation', 'Technical']","['PHP', 'React', 'C++']",Advanced,Mountain

In [282]:
knn_collab_test = np.array([ 
                            0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 
                            1, 0, 0, 0,1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])

knn_test = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
                     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])


In [283]:
#run a test on knn
#note that this returns MENTEES similar to the current mentee

dist_collab, indices_collab = knn_collab.kneighbors([knn_collab_test], 5)
#output indices of mentees that are similar to the current mentee
dist_knn, indices_knn = knn.kneighbors([knn_test], 5)



In [294]:
combined

Unnamed: 0_level_0,001lJh12502oVmee,0AII175WS04xG1N3,0L3YtO8sC2555oS4,0xx22Xf564qn6Rp4,1138hHTZ32O02PMj,11cj8uT0kKwy7567,17f37I04BKdx82pJ,1D2wOOV6g2p7x575,1F5E6YYd85104yXT,207wj4XJoFOU7442,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0071UfJ4WLt18Z3F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
00u13oned0U8XP8Mb4x7,0.0,0.0,0.0,0.0,0.0,0.0,0.52333,0.0,0.0,0.152104,...,0,0,0,0,0,0,0,0,1,0
0lO7us717v3UI31U,0.0,0.0,0.0,0.0,0.748826,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0
224Ks8ALel5OW253,0.0,0.0,0.0,0.902453,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
2gvkshpc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
37T2KouYl5477V1o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.907195,0.0,0.79838,...,0,0,0,0,0,0,0,0,0,1
3e8pkv7m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,0,0,0
6066s0sv35RnW5Eo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
632KLg2a8I4yuH71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087093,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
6gc4Q834RO1h8VT2,0.0,0.0,0.0,0.0,0.0,0.0,0.104225,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0


In [296]:
mentors

Unnamed: 0_level_0,current_comp_Amazin,current_comp_Amozonian,current_comp_Boogle,current_comp_Lahoo,current_comp_Macrohard,current_comp_Pineapple,current_comp_Poptrist,current_comp_Toyota,current_comp_Unemployed,experience_level_Advanced,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
vDw14633y666xVTo,0,0,0,1,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
H8L6d7kG2E1J8f58,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
37T2KouYl5477V1o,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
224Ks8ALel5OW253,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
t82ZE41q1fHbx086,0,0,0,0,0,1,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0
UOMnnoy5005162U5,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
k3U0su6268AGiS74,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
6gc4Q834RO1h8VT2,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
cC8iu2X3681BI7Z6,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
I8263IK0QV74Foh1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [295]:
indices_collab

array([[ 1, 17,  0, 25,  3]], dtype=int64)

In [302]:
combined

Unnamed: 0_level_0,001lJh12502oVmee,0AII175WS04xG1N3,0L3YtO8sC2555oS4,0xx22Xf564qn6Rp4,1138hHTZ32O02PMj,11cj8uT0kKwy7567,17f37I04BKdx82pJ,1D2wOOV6g2p7x575,1F5E6YYd85104yXT,207wj4XJoFOU7442,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0071UfJ4WLt18Z3F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
00u13oned0U8XP8Mb4x7,0.0,0.0,0.0,0.0,0.0,0.0,0.52333,0.0,0.0,0.152104,...,0,0,0,0,0,0,0,0,1,0
0lO7us717v3UI31U,0.0,0.0,0.0,0.0,0.748826,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0
224Ks8ALel5OW253,0.0,0.0,0.0,0.902453,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
2gvkshpc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
37T2KouYl5477V1o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.907195,0.0,0.79838,...,0,0,0,0,0,0,0,0,0,1
3e8pkv7m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,0,0,0
6066s0sv35RnW5Eo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
632KLg2a8I4yuH71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087093,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
6gc4Q834RO1h8VT2,0.0,0.0,0.0,0.0,0.0,0.0,0.104225,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0


In [301]:
collab_mentor_list = []
for index in indices_collab:
    collab_mentor_list.append(mentors.loc[combined.iloc[index].index])

knn_list = []
for index in indices_knn:
    knn_list.append(mentors.loc[combined.iloc[index].index])

In [304]:
collab_df = pd.DataFrame(collab_mentor_list[0])
collab_df

Unnamed: 0_level_0,current_comp_Amazin,current_comp_Amozonian,current_comp_Boogle,current_comp_Lahoo,current_comp_Macrohard,current_comp_Pineapple,current_comp_Poptrist,current_comp_Toyota,current_comp_Unemployed,experience_level_Advanced,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00u13oned0U8XP8Mb4x7,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
cC8iu2X3681BI7Z6,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
0071UfJ4WLt18Z3F,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
t745a885ge47cHdr,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
224Ks8ALel5OW253,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0


In [305]:
knn_df = pd.DataFrame(knn_list[0])
knn_df

Unnamed: 0_level_0,current_comp_Amazin,current_comp_Amozonian,current_comp_Boogle,current_comp_Lahoo,current_comp_Macrohard,current_comp_Pineapple,current_comp_Poptrist,current_comp_Toyota,current_comp_Unemployed,experience_level_Advanced,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
t82ZE41q1fHbx086,0,0,0,0,0,1,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0
E732tjG5RVY7i373,0,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
0071UfJ4WLt18Z3F,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
b730N2FwD81R48fu,0,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
0lO7us717v3UI31U,0,0,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0


#### We see that there is some value in using a rating based KNN as the mentor with a higher rating basis would've scored better with the given review matrix.  

In [310]:
combined

Unnamed: 0_level_0,001lJh12502oVmee,0AII175WS04xG1N3,0L3YtO8sC2555oS4,0xx22Xf564qn6Rp4,1138hHTZ32O02PMj,11cj8uT0kKwy7567,17f37I04BKdx82pJ,1D2wOOV6g2p7x575,1F5E6YYd85104yXT,207wj4XJoFOU7442,...,low_income_True,Android: Java,Career Development,Data Science: Python,General Programming,"Web: HTML, CSS, JavaScript",backend,front end,frontend,iOS: Swift
mentors_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0071UfJ4WLt18Z3F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
00u13oned0U8XP8Mb4x7,0.0,0.0,0.0,0.0,0.0,0.0,0.52333,0.0,0.0,0.152104,...,0,0,0,0,0,0,0,0,1,0
0lO7us717v3UI31U,0.0,0.0,0.0,0.0,0.748826,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0
224Ks8ALel5OW253,0.0,0.0,0.0,0.902453,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
2gvkshpc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
37T2KouYl5477V1o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.907195,0.0,0.79838,...,0,0,0,0,0,0,0,0,0,1
3e8pkv7m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,0,0,0
6066s0sv35RnW5Eo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
632KLg2a8I4yuH71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087093,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
6gc4Q834RO1h8VT2,0.0,0.0,0.0,0.0,0.0,0.0,0.104225,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0


In [312]:
# KNN model
combined[(combined.index == '00u13oned0U8XP8Mb4x7')].mean()

001lJh12502oVmee              0.0
0AII175WS04xG1N3              0.0
0L3YtO8sC2555oS4              0.0
0xx22Xf564qn6Rp4              0.0
1138hHTZ32O02PMj              0.0
                             ... 
Web: HTML, CSS, JavaScript    0.0
backend                       0.0
front end                     0.0
frontend                      1.0
iOS: Swift                    0.0
Length: 144, dtype: float64

In [314]:
# KNN model with rating
combined[(combined.index == 't82ZE41q1fHbx086')].mean()


001lJh12502oVmee              0.0
0AII175WS04xG1N3              0.0
0L3YtO8sC2555oS4              0.0
0xx22Xf564qn6Rp4              0.0
1138hHTZ32O02PMj              0.0
                             ... 
Web: HTML, CSS, JavaScript    0.0
backend                       0.0
front end                     0.0
frontend                      0.0
iOS: Swift                    0.0
Length: 144, dtype: float64