In [None]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator

In [None]:
# This function will return the top 10 shows with the highest cosine similarity value
def top_factors(item_sim_df, factor_name):
    count = 1
    print('Similar shows to {} include:\n'.format(factor_name))
    result = item_sim_df.loc[~item_sim_df.index.isin([factor_name]), factor_name].sort_values(ascending = False)[:10]
    for item, score in result.items():
        print('No. {}: {}({:.2f})'.format(count, item , score))
        count +=1

In [None]:
# This function will return the top 5 crops with the highest similarity value 
def top_crops(user_sim_df, crop):
    if crop not in piv_norm.columns:
        return('No data available on crop {}'.format(crop))
    print('Most Similar crops:\n')
    result = user_sim_df.sort_values(by=crop, ascending=False).loc[:,crop][1:11]
    for crop, sim in result.items():
        print('crop #{0}, Similarity value: {1:.2f}'.format(crop, sim))

In [None]:
# This function constructs a list of lists containing the highest rated shows per similar user
# and returns the name of the show along with the frequency it appears in the list
def similar_user_recs(user_sim_df, user):
    if user not in piv_norm.columns:
        return('No data available on user {}'.format(user))

    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11] 
    best = []
    most_common = {}

    for i in sim_users:
        result_sorted = piv_norm.loc[:, i].sort_values(ascending = False)
        best.append(result_sorted.index[:5].tolist())
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_list[:5]   


In [None]:
# Main function for collaborative filtering based recommendation system
def recommendation_collaborative(dataframe):
    crop = dataframe
    
    crops_label = crop['label'].unique().tolist()

    columns = crop.columns.tolist()
    columns.pop()
    sub_columns = columns.copy()
    columns.insert(0,'crop')


    big_list = []
    for index,i in enumerate(crops_label):
        big_list.append([0 for _ in range(8)])
        big_list[index][0] = i
        for inner_index, n in enumerate(columns[1:]):
            big_list[index][inner_index+1] = crop[crop['label']==i][n].mean()

    separate_list= []
    for index, i in enumerate(big_list):
        for n in sub_columns:
            temp_list = [0 for _ in range(3)]
            temp_list[0] = i[0]
            temp_list[1] = n
            temp_list[2] = crop[crop['label']==i[0]][n].mean()
            separate_list.append(temp_list)

    df = pd.DataFrame(separate_list, columns=['crop','factors','value'])
    piv = df.pivot_table(index=['crop'], columns=['factors'], values='value')

    # Normalize the values
    piv_norm = piv.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1) # min-max scaling 
    piv_norm = piv_norm.T
    piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]

    # Our data needs to be in a sparse matrix format to be read by the following functions
    piv_sparse = sp.sparse.csr_matrix(piv_norm.values)

    item_similarity = cosine_similarity(piv_sparse)
    user_similarity = cosine_similarity(piv_sparse.T)

    item_sim_df = pd.DataFrame(item_similarity, index = piv_norm.index, columns = piv_norm.index)
    user_sim_df = pd.DataFrame(user_similarity, index = piv_norm.columns, columns = piv_norm.columns)

    return item_sim_df, user_sim_df, piv_norm

In [None]:
# MAIN #
crop = pd.read_csv('Crop_recommendation.csv')
factor_sim_df, crop_sim_df, piv_norm = recommendation_collaborative(crop)

In [None]:
# Function Example
top_factors(factor_sim_df, 'rainfall') #ex.N,K,P,rainfall
print()
top_crops(crop_sim_df, 'apple') #ex.rice,apple,jute
# print(similar_user_recs(crop_sim_df, 'coffee')) #ex. rice,apple,jute