In [32]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [33]:
df = pd.read_csv("dfhappy.csv")

# Preprocessing

In [34]:
df.head()

Unnamed: 0.1,Unnamed: 0,Overall rank,country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,tavg
0,0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393,7.352894
1,1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41,10.321334
2,2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341,8.104241
3,3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118,5.449112
4,4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298,11.179195


In [35]:
df.columns

Index(['Unnamed: 0', 'Overall rank', 'country', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'tavg'],
      dtype='object')

In [36]:
df.isnull().sum()

Unnamed: 0                      0
Overall rank                    0
country                         0
Score                           0
GDP per capita                  0
Social support                  0
Healthy life expectancy         0
Freedom to make life choices    0
Generosity                      0
Perceptions of corruption       0
tavg                            0
dtype: int64

# Calculate Cosine Similarities

In [37]:
a = df.drop(['Overall rank','country'], axis = 1)

In [38]:
cos_sim = cosine_similarity(a, a)

In [46]:
cos_sim

array([[1.        , 0.98134245, 0.98289959, ..., 0.01582947, 0.13541862,
        0.01403036],
       [0.98134245, 1.        , 0.98830746, ..., 0.08940574, 0.22939893,
        0.08793547],
       [0.98289959, 0.98830746, 1.        , ..., 0.1880407 , 0.31064343,
        0.18638128],
       ...,
       [0.01582947, 0.08940574, 0.1880407 , ..., 1.        , 0.98349571,
        0.99999647],
       [0.13541862, 0.22939893, 0.31064343, ..., 0.98349571, 1.        ,
        0.98349507],
       [0.01403036, 0.08793547, 0.18638128, ..., 0.99999647, 0.98349507,
        1.        ]])

In [47]:
df = df.reset_index(drop=True)

In [48]:
results = {}
for idx, row in df.iterrows():
    similar_indices = cos_sim[idx].argsort()[:-100:-1]
    similar_items = [(cos_sim[idx][i], df['Overall rank'].iloc[[i]].tolist()[0]) 
                     for i in similar_indices]     
    results[row['Overall rank']] = similar_items[1:]

In [49]:
def get_name(a_id):
    return df[df['Overall rank'] == a_id]['country'].tolist()[0].split(' - ')[0] 

In [53]:
get_name(2)

'Denmark'

In [54]:
def recommend(item_id, N):
    print(f"Recommending {N} countries similar to {get_name(item_id)} ...")
    print("---------------------")
    
    recs = results[item_id][:N]
    for rec in recs:
        print(f"\tRecommended with a score {rec[0]}:\t{get_name(rec[1])} ")

In [56]:
##Recommending based on all features.
recommend(2, 5)

Recommending 5 anime similar to Denmark ...
---------------------
	Recommended with a score 0.9883074592422463:	Norway 
	Recommended with a score 0.9813424527186669:	Finland 
	Recommended with a score 0.9773663269071154:	Netherlands 
	Recommended with a score 0.9326926985536053:	New Zealand 
	Recommended with a score 0.9292264461821912:	Iceland 


# Recommend based on Healthy life expectancy and GDP per capita

In [82]:
df_health = df[['Overall rank','country','Healthy life expectancy', 'GDP per capita']]

In [83]:
df_health.head()

Unnamed: 0,Overall rank,country,Healthy life expectancy,GDP per capita
0,1,Finland,0.986,1.34
1,2,Denmark,0.996,1.383
2,3,Norway,1.028,1.488
3,4,Iceland,1.026,1.38
4,5,Netherlands,0.999,1.396


In [84]:
a = df_health.drop(['Overall rank','country'], axis = 1)

In [85]:
cos_sim = cosine_similarity(a, a)

In [86]:
cos_sim

array([[1.        , 0.9999477 , 0.99955604, ..., 0.98617028, 0.76888858,
        0.99120392],
       [0.9999477 , 1.        , 0.99980848, ..., 0.98442372, 0.76230938,
        0.9897986 ],
       [0.99955604, 0.99980848, 1.        , ..., 0.98079444, 0.74949705,
        0.98682075],
       ...,
       [0.98617028, 0.98442372, 0.98079444, ..., 1.        , 0.86422335,
        0.99942982],
       [0.76888858, 0.76230938, 0.74949705, ..., 0.86422335, 1.        ,
        0.84674349],
       [0.99120392, 0.9897986 , 0.98682075, ..., 0.99942982, 0.84674349,
        1.        ]])

In [87]:
df_health = df_health.reset_index(drop=True)

In [88]:
results = {}
for idx, row in df_health.iterrows():
    similar_indices = cos_sim[idx].argsort()[:-100:-1]
    similar_items = [(cos_sim[idx][i], df_health['Overall rank'].iloc[[i]].tolist()[0]) 
                     for i in similar_indices]     
    results[row['Overall rank']] = similar_items[1:]

In [89]:
def get_name(a_id):
    return df_health[df_health['Overall rank'] == a_id]['country'].tolist()[0].split(' - ')[0] 

In [90]:
get_name(2)

'Denmark'

In [91]:
def recommend(item_id, N):
    print(f"Recommending {N} countries similar to {get_name(item_id)} ...")
    print("---------------------")
    
    recs = results[item_id][:N]
    for rec in recs:
        print(f"\tRecommended with a score {rec[0]}:\t{get_name(rec[1])} ")

In [94]:
recommend(100, 5)

Recommending 5 anime similar to Nepal ...
---------------------
	Recommended with a score 0.999965698181197:	Togo 
	Recommended with a score 0.9998163151301659:	Tajikistan 
	Recommended with a score 0.9998154442835437:	Ethiopia 
	Recommended with a score 0.9996184065770634:	Yemen 
	Recommended with a score 0.9991561957832371:	Haiti 


# Recommend based on Generosity, Average Temp., Social support

In [100]:
df_generosity = df[['Overall rank','country','Generosity', 'tavg', 'Social support']]

In [101]:
df_generosity.head()

Unnamed: 0,Overall rank,country,Generosity,tavg,Social support
0,1,Finland,0.153,7.352894,1.587
1,2,Denmark,0.252,10.321334,1.573
2,3,Norway,0.271,8.104241,1.582
3,4,Iceland,0.354,5.449112,1.624
4,5,Netherlands,0.322,11.179195,1.522


In [102]:
a = df_generosity.drop(['Overall rank','country'], axis = 1)

In [103]:
cos_sim = cosine_similarity(a, a)

In [104]:
cos_sim

array([[1.        , 0.99811342, 0.99972656, ..., 0.20766517, 0.97742433,
        0.20574868],
       [0.99811342, 1.        , 0.99910029, ..., 0.15109509, 0.98846349,
        0.15010307],
       [0.99972656, 0.99910029, 1.        , ..., 0.19271322, 0.98118519,
        0.1915353 ],
       ...,
       [0.20766517, 0.15109509, 0.19271322, ..., 1.        , 0.00242452,
        0.99914969],
       [0.97742433, 0.98846349, 0.98118519, ..., 0.00242452, 1.        ,
        0.00274955],
       [0.20574868, 0.15010307, 0.1915353 , ..., 0.99914969, 0.00274955,
        1.        ]])

In [105]:
df_generosity = df_generosity.reset_index(drop=True)

In [106]:
results = {}
for idx, row in df_health.iterrows():
    similar_indices = cos_sim[idx].argsort()[:-100:-1]
    similar_items = [(cos_sim[idx][i], df_generosity['Overall rank'].iloc[[i]].tolist()[0]) 
                     for i in similar_indices]     
    results[row['Overall rank']] = similar_items[1:]

In [107]:
def get_name(a_id):
    return df_generosity[df_generosity['Overall rank'] == a_id]['country'].tolist()[0].split(' - ')[0]

In [108]:
get_name(11)

'Australia'

In [109]:
def recommend(item_id, N):
    print(f"Recommending {N} countries similar to {get_name(item_id)} ...")
    print("---------------------")
    
    recs = results[item_id][:N]
    for rec in recs:
        print(f"\tRecommended with a score {rec[0]}:\t{get_name(rec[1])} ")

In [111]:
recommend(11, 5)

Recommending 5 countries similar to Australia ...
---------------------
	Recommended with a score 0.9999985051384672:	Kosovo 
	Recommended with a score 0.9999970861545224:	Bosnia and Herzegovina 
	Recommended with a score 0.9999931266912492:	Uzbekistan 
	Recommended with a score 0.9999885587273373:	Tajikistan 
	Recommended with a score 0.9999739481548584:	Malaysia 
