## Content Based Filtering

Uses similarities between patterns based on angular distance of eudlidean distance of pattern features. Feature engineering for this recommender can have a large inpact on results.


In [1]:
# import libraries
import pandas as pd
import numpy as np
import ast

# For numerically encoding and preprocessing patterns in order to compare similarity
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline, FeatureUnion

# similarity metrics 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.metrics.pairwise import euclidean_distances

# Visulaization of pipeline
from sklearn import set_config

from util_functions import *

# import pickle


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
# import data
df = pd.read_csv('data/patterns_cleaned.csv', low_memory=False)
# df = pd.read_csv('patterns_cleaned.csv', low_memory=False)
pd.options.mode.chained_assignment = None 

In [None]:
# DROP OUTLIERS THAT AFFECT SCALING! 

In [None]:
df.shape

(132843, 24)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132843 entries, 0 to 132842
Data columns (total 24 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   pattern_id               132843 non-null  int64  
 1   name                     132843 non-null  object 
 2   name_permalink           132843 non-null  object 
 3   favorites_count          132843 non-null  int64  
 4   projects_count           132843 non-null  int64  
 5   difficulty_average       132843 non-null  float64
 6   difficulty_count         131547 non-null  float64
 7   rating_average           132843 non-null  float64
 8   queued_projects_count    132843 non-null  int64  
 9   rating_count             132841 non-null  float64
 10  pattern_type_names       132842 non-null  object 
 11  pattern_type_clothing    132842 non-null  object 
 12  photos_url               132843 non-null  object 
 13  pattern_needle_sizes     132843 non-null  object 
 14  patt

### Processing Pipeline

In [3]:
categorical_features = ['free', 'pattern_type_names',  'coded_year']#,'downloadable', 'coded_month',]
numeric_features = ['yardage', 'difficulty_average','gauge_per_inch', 'yardage_avg',]
custom_function_pre_encoded_features = ['yarn_weight_description', 'needle_sizes']

custom_function_transformer = Pipeline(steps=[
                                    ("cosolidate_gauge", DataframeFunctionTransformer(consolidate_gauge)),
                                    ("use_avg_yardage", DataframeFunctionTransformer(use_avg_yardage)),
                                    ("encode_yarn_weights", DataframeFunctionTransformer(encode_yarn_weights)),
                                    ("get_needle_size", DataframeFunctionTransformer(get_needle_size)), 
#                                     ("encode_months",DataframeFunctionTransformer(code_months)), 
                                    ("encode_years",DataframeFunctionTransformer(code_years)), 
                              # NOTE NEED TO DOWNWEIGHT THESE!!
                                    ]) 

attributes_transformer = Pipeline(steps=[("get_corpus", FunctionTransformer(get_corpus)),
                                    ('count_vectorize_attributes', CountVectorizer()),
                                    ('to_dense', ToDenseTransformer())]) 


numeric_transformer = Pipeline(steps=[('impute_mode', SimpleImputer(strategy='median')), 
                                      ('scaling', StandardScaler())]) 


categorical_transformer = Pipeline(steps=[('impute_mode', SimpleImputer(strategy='most_frequent')), 
                                          ('one-hot-encode', OneHotEncoder(sparse=False))])

pre_encoded_feature_transformer = Pipeline(steps=[
                                    ('impute_mode', SimpleImputer(strategy='median'))])
                        

preprocessor = ColumnTransformer(
               transformers=[('pre-ecoded_features', pre_encoded_feature_transformer, custom_function_pre_encoded_features),
                             ('numeric', numeric_transformer, numeric_features),
                             ('categorical', categorical_transformer, categorical_features)]) 


main_pipeline = Pipeline(steps = [('custom_feature_transform', custom_function_transformer),
                            ('preprocessor', preprocessor)])

pipeline = FeatureUnion([('main_pipeline', main_pipeline),
                            ('attributes', attributes_transformer)])


In [4]:
# Visualize Pipeline
set_config(display='diagram')
pipeline

In [5]:
# Fit and transform data to format to be used for similarity comparison

X = pipeline.fit_transform(df)
X.shape

(132843, 59)

## Calculating Recommendations:

### a) Euclidean Distance
We want to find the patterns closest distance-wise to the pattern we are using to compare.  This is why scaling important as the magnitudes could skew the vectors away from similar patterns, or strengthen important attributes. 

In [None]:
def find_top_eucliedean_recommendations_df(name_permalink):
  
    # Find index
    try:
        pattern_to_compare = X[get_index_from_name_permalink(name_permalink,df)] 
    except:
        # transform through preprocessing pipeline
        print("pattern wasn't processed yet - try to process it now")
        pattern_to_compare  = get_pattern_metadata_from_url(pattern_url, df)
#         need to download single pattern 
        pattern_to_compare = pipeline.transform(pattern_to_compare)
        pattern_to_compare = get_metadata_from_name_permalink(name_permalink, df)

    # Get distances from all other patterns
    distances = euclidean_distances(X, pattern_to_compare)
    distances = distances.reshape(-1)   
    df['distances'] = distances
    
    # Find N number of indices with the least distance to chosen pattern 
    ordered_indices = distances.argsort()
    closest_indices = ordered_indices[:20]

    # # Get the patterns for these indices
#     closest_df = df.iloc[closest_indices]
    closest_df = df.iloc[ordered_indices]
    closest_df['rank'] = df['distances'].rank()
    return closest_df

def list_top_euclidean_recommendations(df):
    df =df[0:20]
    name_permalink = []
    image_url = []
    url = []
    distances = []
    for i in range(df.shape[0]):
        name_permalink.append(df.name_permalink.iloc[i])
        image_url.append(df.photos_url.iloc[i])
        distances.append(df.distances.iloc[i])
        url.append('https://www.ravelry.com/patterns/library/' +df.name_permalink.iloc[i])
    return name_permalink, image_url, url, distances

def print_top_euclidean_recommendations(name_permalink_list, image_url, url, distances):
    for i in range(len(name_permalink_list)):
        print(f'{name_permalink_list[i]},\t {url[i]}, \t {distances[i]:.4f}')

#### Recommend Patterns:

In [None]:
# name_permalink='jasmine-the-giraffe'
# name_permalink='soldotna-crop'
# name_permalink='featherweight-cardigan'
name_permalink='sheldon'
recommended_df= find_top_eucliedean_recommendations_df(name_permalink)
name_permalink_list, image_url, url, distances = list_top_euclidean_recommendations(recommended_df)
print_top_euclidean_recommendations(name_permalink_list,  image_url,  url, distances)

sheldon,	 https://www.ravelry.com/patterns/library/sheldon, 	 0.0000
purl-critter,	 https://www.ravelry.com/patterns/library/purl-critter, 	 1.3091
trudie-the-turtle,	 https://www.ravelry.com/patterns/library/trudie-the-turtle, 	 1.3656
teddy-bear-3,	 https://www.ravelry.com/patterns/library/teddy-bear-3, 	 1.4456
celestine,	 https://www.ravelry.com/patterns/library/celestine, 	 1.4758
bean-cow,	 https://www.ravelry.com/patterns/library/bean-cow, 	 1.4918
sheldon-superhero-outfit,	 https://www.ravelry.com/patterns/library/sheldon-superhero-outfit, 	 1.4922
halloween-witch-doll,	 https://www.ravelry.com/patterns/library/halloween-witch-doll, 	 1.5006
sweet-sweater-norwegian-teddy-bear-sweater,	 https://www.ravelry.com/patterns/library/sweet-sweater-norwegian-teddy-bear-sweater, 	 1.5156
robe-printaniere-gotz,	 https://www.ravelry.com/patterns/library/robe-printaniere-gotz, 	 1.5247
adam-10,	 https://www.ravelry.com/patterns/library/adam-10, 	 1.5252
lake-park-hat,	 https://www.ravelry.c



In [None]:
# Recommended metadata
recommended_df.head(10)

Unnamed: 0,pattern_id,name,name_permalink,favorites_count,projects_count,difficulty_average,difficulty_count,rating_average,queued_projects_count,rating_count,pattern_type_names,pattern_type_clothing,photos_url,pattern_needle_sizes,pattern_attributes,yardage_max,yardage,generally_available,gauge,gauge_divisor,free,downloadable,categories,yarn_weight_description,gauge_per_inch,yardage_avg,distances,rank
1973,1080,Sheldon,sheldon,13584,4117,4.428326,1744.0,4.394948,3893,1623.0,toys,False,https://images4-g.ravelrycache.com/uploads/cas...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",slippedstitches,220.0,220.0,2006/12/01 00:00:00 -0500,25.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.25,220.0,0.0,1.0
19279,12514,Purl Critter,purl-critter,188,11,4.125,8.0,4.25,35,8.0,toys,False,https://images4-f.ravelrycache.com/uploads/pur...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,330.0,330.0,2007/02/01 00:00:00 -0500,6.0,1.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.0,330.0,1.309063,2.0
8047,49859,Trudie the Turtle,trudie-the-turtle,1344,146,3.485294,68.0,4.238095,281,63.0,toys,False,https://images4-g.ravelrycache.com/uploads/sne...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",,100.0,70.0,2007/07/01 00:00:00 -0400,24.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.0,85.0,1.365605,3.0
1150,7345,Teddy Bear,teddy-bear-3,14405,1858,3.444134,716.0,4.158537,2230,656.0,toys,False,https://images4-g.ravelrycache.com/uploads/Fre...,"[{'id': 20, 'us': '2½', 'metric': 3.0, 'us_ste...",,274.0,274.0,2006/08/01 00:00:00 -0400,27.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.75,274.0,1.445578,4.0
7916,38123,Celestine,celestine,5566,545,3.714912,228.0,4.350467,1488,214.0,toys,False,https://images4-f.ravelrycache.com/uploads/nor...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,288.0,150.0,2007/12/01 00:00:00 -0500,28.0,4.0,True,True,"['ball', 'toysandhobbies']",Sport (12 wpi),7.0,219.0,1.475837,5.0
112014,192149,Bean Cow,bean-cow,552,19,4.5,8.0,4.571429,98,7.0,toys,False,https://images4-g.ravelrycache.com/flickr/4/8/...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",felted,100.0,50.0,2010/07/01 00:00:00 -0400,6.0,1.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.0,75.0,1.491751,6.0
31238,108492,Sheldon Superhero Outfit,sheldon-superhero-outfit,400,33,4.384615,13.0,3.785714,84,14.0,toys,False,https://images4-g.ravelrycache.com/flickr/3/4/...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded,,,2009/02/01 00:00:00 -0500,25.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.25,,1.492227,7.0
29587,92699,Halloween Witch Doll,halloween-witch-doll,496,40,4.133333,15.0,4.285714,98,14.0,toys,False,https://images4-g.ravelrycache.com/flickr/2/9/...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",amigurumi,,,2008/10/01 00:00:00 -0400,6.5,1.0,True,True,"['doll', 'softies', 'toysandhobbies']",Sport (12 wpi),6.5,,1.500621,8.0
21060,20509,Sweet Sweater: Norwegian Teddy Bear Sweater,sweet-sweater-norwegian-teddy-bear-sweater,254,29,4.916667,12.0,3.6,49,10.0,toys,False,https://images4-f.ravelrycache.com/uploads/phi...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded,220.0,110.0,2006/01/01 00:00:00 -0500,6.25,1.0,True,True,"['other-dollclothes', 'dollclothes', 'toysandh...",Sport (12 wpi),6.25,165.0,1.515607,9.0
128544,655679,Robe printanière Götz,robe-printaniere-gotz,140,17,4.0,4.0,4.666667,13,3.0,toys,False,https://images4-g.ravelrycache.com/uploads/Sop...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",slippedstitches,,,2016/03/01 00:00:00 -0500,24.0,4.0,True,True,"['child-doll', 'dollclothes', 'toysandhobbies']",Sport (12 wpi),6.0,,1.524733,10.0


In [None]:
# recommended_df.photos_url.tolist()

Let's try another:

In [None]:
name_permalink='soldotna-crop'

recommended_df= find_top_eucliedean_recommendations_df(name_permalink)
name_permalink_list, image_url, url, distances = list_top_euclidean_recommendations(recommended_df)
print_top_euclidean_recommendations(name_permalink_list,  image_url,  url, distances)

soldotna-crop,	 https://www.ravelry.com/patterns/library/soldotna-crop, 	 0.0000
salix-alba,	 https://www.ravelry.com/patterns/library/salix-alba, 	 0.3879
trojan-barbros-stjarnor,	 https://www.ravelry.com/patterns/library/trojan-barbros-stjarnor, 	 0.4103
skiing-optional,	 https://www.ravelry.com/patterns/library/skiing-optional, 	 0.4701
glass-ceiling,	 https://www.ravelry.com/patterns/library/glass-ceiling, 	 0.4883
bohus,	 https://www.ravelry.com/patterns/library/bohus, 	 0.5044
vintervannet,	 https://www.ravelry.com/patterns/library/vintervannet, 	 0.5108
altheda-2,	 https://www.ravelry.com/patterns/library/altheda-2, 	 0.5353
petrichor-sweater,	 https://www.ravelry.com/patterns/library/petrichor-sweater, 	 0.5863
snedronningen,	 https://www.ravelry.com/patterns/library/snedronningen, 	 0.6085
bravura-pullover,	 https://www.ravelry.com/patterns/library/bravura-pullover, 	 0.6462
cardoon-2,	 https://www.ravelry.com/patterns/library/cardoon-2, 	 0.6517
fortuna-sweater,	 https://www.



In [None]:
# Recommended metadata
recommended_df.head(10)

Unnamed: 0,pattern_id,name,name_permalink,favorites_count,projects_count,difficulty_average,difficulty_count,rating_average,queued_projects_count,rating_count,pattern_type_names,pattern_type_clothing,photos_url,pattern_needle_sizes,pattern_attributes,yardage_max,yardage,generally_available,gauge,gauge_divisor,free,downloadable,categories,yarn_weight_description,gauge_per_inch,yardage_avg,distances,rank
55703,910492,Soldotna Crop,soldotna-crop,29450,6302,3.73999,1923.0,4.701746,3859,2062.0,pullover,True,https://images4-g.ravelrycache.com/uploads/boy...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded shortrows,1400.0,924.0,2019/03/01 00:00:00 -0500,22.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.5,1162.0,0.0,1.0
59300,1082957,Salix Alba,salix-alba,598,21,4.0,3.0,4.75,60,4.0,pullover,True,https://images4-g.ravelrycache.com/uploads/elk...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded shortrows,2160.0,957.0,2020/11/01 00:00:00 -0400,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1558.5,0.387947,2.0
57032,1134482,Tröjan Barbros stjärnor,trojan-barbros-stjarnor,2393,37,3.785714,14.0,4.733333,266,15.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Maj...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1531.0,875.0,2021/04/01 00:00:00 -0400,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.0,1203.0,0.41027,3.0
67093,903611,Skiing Optional,skiing-optional,735,18,3.428571,7.0,4.571429,71,7.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Uan...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,2277.0,906.0,2019/02/01 00:00:00 -0500,22.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.5,1591.5,0.470114,4.0
62711,886458,Glass Ceiling,glass-ceiling,1927,31,3.9,10.0,4.8,158,10.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Hei...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1850.0,1070.0,2018/11/01 00:00:00 -0400,23.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.75,1460.0,0.488331,5.0
61048,902228,Bohus,bohus,1459,64,4.272727,22.0,4.954545,161,22.0,pullover,True,https://images4-g.ravelrycache.com/uploads/lai...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",stranded shortrows,1540.0,977.0,2019/02/01 00:00:00 -0500,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1258.5,0.504384,6.0
57799,1124763,Vintervannet,vintervannet,1318,23,3.285714,7.0,4.625,115,8.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Jul...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1815.0,913.0,2021/03/01 00:00:00 -0500,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1364.0,0.510774,7.0
55908,932512,Altheda,altheda-2,13011,594,3.625698,179.0,4.826531,1502,196.0,pullover,True,https://images4-g.ravelrycache.com/uploads/lov...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,2070.0,1080.0,2019/05/01 00:00:00 -0400,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.0,1575.0,0.535283,8.0
59913,1093975,Petrichor Sweater,petrichor-sweater,408,11,3.166667,6.0,4.833333,47,6.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Mor...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1680.0,990.0,2020/12/01 00:00:00 -0500,22.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.5,1335.0,0.586275,9.0
55943,1096466,Snedronningen,snedronningen,5675,170,3.315789,57.0,4.84375,555,64.0,pullover,True,https://images4-g.ravelrycache.com/uploads/lil...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1831.0,1027.0,2020/12/01 00:00:00 -0500,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.0,1429.0,0.608499,10.0


In [None]:
recommended_df[0:15].photos_url.tolist()

['https://images4-g.ravelrycache.com/uploads/boylandknitworks/612664255/Attachment-1_2_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/elkmarketyarn/735129884/s-3570_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/Majaakerstrom/781159818/176151098_136630378369726_7061717603221722739_n_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/UandIKnit/606254332/F8A5D695-4EA7-47CE-BF48-E5FAADF59CB4_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/HeidiKdesigns/590452034/fullsizeoutput_a2e2_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/lainemagazine/599429798/laine7_carolfeller_sk-4_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/Jules-Coco/762890271/R5J_0109-min_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/lovewool-knits/656849719/fullsizeoutput_1291_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/Morthunder/748212285/9002E51B-D356-4154-A41D-D5F756B19FCC_square.JPG',
 'https://images4-g.ravelrycache.com/upload

Okay - This prediction didn't do so bad; these patterns have the loose chunky knit feel - which is great, but missing the "fun" cat vibe. 

### b) Cosine Similarity

Similarity can be calculated by cosine of the angle between 2 vectors.  Cosine similarity scale is between 0 and 1, 1 being colinear(identical tastes), and 0 being orthogonal (no similarity, or commonality - they are independent), and -1 opposite tastes.  

In [7]:
print(X.shape)
X_smaller = X[0:60000,:] # Need to take subsection unfortunately (ran out of RAM on the expensive Colab (51gb))
X_smaller.shape

(132843, 59)


(80000, 59)

In [8]:
cosine_sim = cosine_similarity(X_smaller)



In [9]:
# name_permalink_to_use='jasmine-the-giraffe'
name_permalink_to_use='sheldon'
pattern_index = get_index_from_name_permalink(name_permalink_to_use, df)
similar_patterns = list(enumerate(cosine_sim[pattern_index]))

In [10]:
sorted_similar_patterns  = sorted(similar_patterns, key=lambda x:x[1], reverse=True)
closest_indices = sorted_similar_patterns[:20]

In [11]:
# give recomendations for the pattern selected
pattern_ids = []
ordered_indices = []
for i in range(len(closest_indices)):
    ordered_indices.append(closest_indices[i][0])
    if i == 0:
        print('Recommendations for similar patterns to {0} {1}:\n'.format(closest_indices[i][0], df.iloc[closest_indices[i][0]]['name_permalink']))
    print(f"{i}: {df.iloc[closest_indices[i][0]]['name_permalink']} with distance of: {closest_indices[i][1]}")

closest_df = df.iloc[ordered_indices]
closest_df.head(10) 

Recommendations for similar patterns to 1973 sheldon:

0: sheldon with distance of: 1.0
1: purl-critter with distance of: 0.9738903468856216
2: sock-hippo with distance of: 0.967652280964983
3: sock-turtle-11 with distance of: 0.9665124296708574
4: trudie-the-turtle with distance of: 0.966058118263447
5: teddy-bear-3 with distance of: 0.9634228006363145
6: celestine with distance of: 0.9625275775464144
7: sheldon-superhero-outfit with distance of: 0.9586652871698848
8: halloween-witch-doll with distance of: 0.9585840116084504
9: sweet-sweater-norwegian-teddy-bear-sweater with distance of: 0.9581536345769274
10: adam-10 with distance of: 0.9569843758060695
11: lake-park-hat with distance of: 0.9565699306288343
12: spearmint-the-bunny with distance of: 0.9546226935869451
13: soccer-ball with distance of: 0.9544265361825468
14: woolie-the-ewe with distance of: 0.9530004475415338
15: slip-stitch-delight-socks with distance of: 0.949074450661819
16: celestine-sox with distance of: 0.9461419

Unnamed: 0,pattern_id,name,name_permalink,favorites_count,projects_count,difficulty_average,difficulty_count,rating_average,queued_projects_count,rating_count,pattern_type_names,pattern_type_clothing,photos_url,pattern_needle_sizes,pattern_attributes,yardage_max,yardage,generally_available,gauge,gauge_divisor,free,downloadable,categories,yarn_weight_description,gauge_per_inch,yardage_avg
1973,1080,Sheldon,sheldon,13584,4117,4.428326,1744.0,4.394948,3893,1623.0,toys,False,https://images4-g.ravelrycache.com/uploads/cas...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",slippedstitches,220.0,220.0,2006/12/01 00:00:00 -0500,25.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.25,220.0
19279,12514,Purl Critter,purl-critter,188,11,4.125,8.0,4.25,35,8.0,toys,False,https://images4-f.ravelrycache.com/uploads/pur...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,330.0,330.0,2007/02/01 00:00:00 -0500,6.0,1.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.0,330.0
28507,81047,Sock Hippo,sock-hippo,3869,295,4.563636,110.0,4.28866,774,97.0,toys,False,https://images4-f.ravelrycache.com/uploads/msh...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,,,2008/08/20 11:31:54 -0400,6.0,1.0,True,True,"['animal', 'softies', 'toysandhobbies']",Fingering (14 wpi),6.0,
28003,75769,Sock Turtle 1.1,sock-turtle-11,627,58,4.076923,26.0,4.181818,170,22.0,toys,False,https://images4-f.ravelrycache.com/uploads/myb...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,65.0,65.0,2008/01/01 00:00:00 -0500,6.0,1.0,True,True,"['animal', 'softies', 'toysandhobbies']",Fingering (14 wpi),6.0,65.0
8047,49859,Trudie the Turtle,trudie-the-turtle,1344,146,3.485294,68.0,4.238095,281,63.0,toys,False,https://images4-g.ravelrycache.com/uploads/sne...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",,100.0,70.0,2007/07/01 00:00:00 -0400,24.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.0,85.0
1150,7345,Teddy Bear,teddy-bear-3,14405,1858,3.444134,716.0,4.158537,2230,656.0,toys,False,https://images4-g.ravelrycache.com/uploads/Fre...,"[{'id': 20, 'us': '2½', 'metric': 3.0, 'us_ste...",,274.0,274.0,2006/08/01 00:00:00 -0400,27.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.75,274.0
7916,38123,Celestine,celestine,5566,545,3.714912,228.0,4.350467,1488,214.0,toys,False,https://images4-f.ravelrycache.com/uploads/nor...,"[{'id': 2, 'us': '2 ', 'metric': 2.75, 'us_ste...",,288.0,150.0,2007/12/01 00:00:00 -0500,28.0,4.0,True,True,"['ball', 'toysandhobbies']",Sport (12 wpi),7.0,219.0
31238,108492,Sheldon Superhero Outfit,sheldon-superhero-outfit,400,33,4.384615,13.0,3.785714,84,14.0,toys,False,https://images4-g.ravelrycache.com/flickr/3/4/...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded,,,2009/02/01 00:00:00 -0500,25.0,4.0,True,True,"['animal', 'softies', 'toysandhobbies']",Sport (12 wpi),6.25,
29587,92699,Halloween Witch Doll,halloween-witch-doll,496,40,4.133333,15.0,4.285714,98,14.0,toys,False,https://images4-g.ravelrycache.com/flickr/2/9/...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",amigurumi,,,2008/10/01 00:00:00 -0400,6.5,1.0,True,True,"['doll', 'softies', 'toysandhobbies']",Sport (12 wpi),6.5,
21060,20509,Sweet Sweater: Norwegian Teddy Bear Sweater,sweet-sweater-norwegian-teddy-bear-sweater,254,29,4.916667,12.0,3.6,49,10.0,toys,False,https://images4-f.ravelrycache.com/uploads/phi...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded,220.0,110.0,2006/01/01 00:00:00 -0500,6.25,1.0,True,True,"['other-dollclothes', 'dollclothes', 'toysandh...",Sport (12 wpi),6.25,165.0


And try the sweater like above to compare:

In [12]:
closest_df.photos_url.tolist()

['https://images4-g.ravelrycache.com/uploads/casey/153936026/www.knitty.com-sheldonbeauty_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/purlsoho/60108559/hello_foxy_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/mshmom/23662036/IMG_2142_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/mybb/54217142/100_6370_square.JPG',
 'https://images4-g.ravelrycache.com/uploads/sneakysquirrell/523343077/2257115698_38602c575e_b_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/FrenchPressKnits/13455721/IMG_3026_square.JPG',
 'https://images4-f.ravelrycache.com/uploads/norah/298871/celestine_lg_square.jpg',
 'https://images4-g.ravelrycache.com/flickr/3/4/8/3480742799/3480742799_s.jpg',
 'https://images4-g.ravelrycache.com/flickr/2/9/6/2967358865/2967358865_s.jpg',
 'https://images4-f.ravelrycache.com/uploads/phibetakitten/14633968/teddy_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/dabblersupreme/569378958/2173490951_8c197fe3d8_z_square.jpg',
 'https

In [13]:
name_permalink_to_use='soldotna-crop'
pattern_index = get_index_from_name_permalink(name_permalink_to_use, df)
similar_patterns = list(enumerate(cosine_sim[pattern_index]))

In [14]:
sorted_similar_patterns  = sorted(similar_patterns, key=lambda x:x[1], reverse=True)
closest_indices = sorted_similar_patterns[:20]

In [15]:
# give recomendations for the pattern selected
pattern_ids = []
ordered_indices = []
for i in range(len(closest_indices)):
    ordered_indices.append(closest_indices[i][0])
    if i == 0:
        print('Recommendations for similar patterns to {0} {1}:\n'.format(closest_indices[i][0], df.iloc[closest_indices[i][0]]['name_permalink']))
    print(f"{i}: {df.iloc[closest_indices[i][0]]['name_permalink']} with distance of: {closest_indices[i][1]}")

closest_df = df.iloc[ordered_indices]
closest_df.head(10)

Recommendations for similar patterns to 55703 soldotna-crop:

0: soldotna-crop with distance of: 1.0000000000000002
1: trojan-barbros-stjarnor with distance of: 0.9983830594610734
2: salix-alba with distance of: 0.9983778239036563
3: skiing-optional with distance of: 0.9977201721886153
4: glass-ceiling with distance of: 0.9973797929028136
5: vintervannet with distance of: 0.9973366127672527
6: november-17 with distance of: 0.9972879868110587
7: bohus with distance of: 0.9972789870530068
8: koivua with distance of: 0.9970100101164837
9: altheda-2 with distance of: 0.9969036099782964
10: garden-lights-2 with distance of: 0.9967998081494227
11: muttis-blueberries with distance of: 0.9966382126643474
12: manou with distance of: 0.9965577690714461
13: dubula with distance of: 0.9965553955832612
14: petrichor-sweater with distance of: 0.9964980949721506
15: snedronningen with distance of: 0.9962907322349338
16: a-foxy-frolic with distance of: 0.9961200436613021
17: fortuna-sweater with dista

Unnamed: 0,pattern_id,name,name_permalink,favorites_count,projects_count,difficulty_average,difficulty_count,rating_average,queued_projects_count,rating_count,pattern_type_names,pattern_type_clothing,photos_url,pattern_needle_sizes,pattern_attributes,yardage_max,yardage,generally_available,gauge,gauge_divisor,free,downloadable,categories,yarn_weight_description,gauge_per_inch,yardage_avg
55703,910492,Soldotna Crop,soldotna-crop,29450,6302,3.73999,1923.0,4.701746,3859,2062.0,pullover,True,https://images4-g.ravelrycache.com/uploads/boy...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded shortrows,1400.0,924.0,2019/03/01 00:00:00 -0500,22.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.5,1162.0
57032,1134482,Tröjan Barbros stjärnor,trojan-barbros-stjarnor,2393,37,3.785714,14.0,4.733333,266,15.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Maj...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1531.0,875.0,2021/04/01 00:00:00 -0400,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.0,1203.0
59300,1082957,Salix Alba,salix-alba,598,21,4.0,3.0,4.75,60,4.0,pullover,True,https://images4-g.ravelrycache.com/uploads/elk...,"[{'id': 3, 'us': '3 ', 'metric': 3.25, 'us_ste...",stranded shortrows,2160.0,957.0,2020/11/01 00:00:00 -0400,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1558.5
67093,903611,Skiing Optional,skiing-optional,735,18,3.428571,7.0,4.571429,71,7.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Uan...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,2277.0,906.0,2019/02/01 00:00:00 -0500,22.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.5,1591.5
62711,886458,Glass Ceiling,glass-ceiling,1927,31,3.9,10.0,4.8,158,10.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Hei...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1850.0,1070.0,2018/11/01 00:00:00 -0400,23.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.75,1460.0
57799,1124763,Vintervannet,vintervannet,1318,23,3.285714,7.0,4.625,115,8.0,pullover,True,https://images4-g.ravelrycache.com/uploads/Jul...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,1815.0,913.0,2021/03/01 00:00:00 -0500,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1364.0
62945,975358,November,november-17,526,11,4.0,4.0,4.75,40,4.0,pullover,True,https://images4-f.ravelrycache.com/uploads/Jes...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",stranded shortrows,2121.0,1017.0,2019/11/01 00:00:00 -0400,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",Worsted (9 wpi),5.0,1569.0
61048,902228,Bohus,bohus,1459,64,4.272727,22.0,4.954545,161,22.0,pullover,True,https://images4-g.ravelrycache.com/uploads/lai...,"[{'id': 5, 'us': '5 ', 'metric': 3.75, 'us_ste...",stranded shortrows,1540.0,977.0,2019/02/01 00:00:00 -0500,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.25,1258.5
55793,906148,Koivua,koivua,13463,1009,3.950147,341.0,4.853591,1657,362.0,pullover,True,https://images4-f.ravelrycache.com/uploads/boy...,"[{'id': 6, 'us': '6 ', 'metric': 4.0, 'us_stee...",stranded shortrows,2160.0,950.0,2019/02/01 00:00:00 -0500,21.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",Worsted (9 wpi),5.25,1555.0
55908,932512,Altheda,altheda-2,13011,594,3.625698,179.0,4.826531,1502,196.0,pullover,True,https://images4-g.ravelrycache.com/uploads/lov...,"[{'id': 4, 'us': '4 ', 'metric': 3.5, 'us_stee...",stranded shortrows,2070.0,1080.0,2019/05/01 00:00:00 -0400,20.0,4.0,False,True,"['pullover', 'sweater', 'clothing']",DK (11 wpi),5.0,1575.0


In [16]:
closest_df.photos_url.tolist()

['https://images4-g.ravelrycache.com/uploads/boylandknitworks/612664255/Attachment-1_2_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/Majaakerstrom/781159818/176151098_136630378369726_7061717603221722739_n_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/elkmarketyarn/735129884/s-3570_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/UandIKnit/606254332/F8A5D695-4EA7-47CE-BF48-E5FAADF59CB4_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/HeidiKdesigns/590452034/fullsizeoutput_a2e2_square.jpeg',
 'https://images4-g.ravelrycache.com/uploads/Jules-Coco/762890271/R5J_0109-min_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/Jessicamknits/656360737/DSC_0392_square.JPG',
 'https://images4-g.ravelrycache.com/uploads/lainemagazine/599429798/laine7_carolfeller_sk-4_square.jpg',
 'https://images4-f.ravelrycache.com/uploads/boylandknitworks/608179569/IMG_7513_square.jpg',
 'https://images4-g.ravelrycache.com/uploads/lovewool-knits/656849719/fullsize