In [23]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

pd.set_option('display.max_row', 22)
pd.set_option('display.max_columns', 8)
pd.set_option('display.width', 200)

In [2]:
# This function will return the top 10 shows with the highest cosine similarity value
def top_factors(item_sim_df, factor_name):
    count = 1
    print('Similar shows to {} include:\n'.format(factor_name))
    result = item_sim_df.loc[~item_sim_df.index.isin([factor_name]), factor_name].sort_values(ascending = False)[:10]
    for item, score in result.items():
        print('No. {}: {}({:.2f})'.format(count, item , score))
        count +=1

In [3]:
# This function will return the top 5 crops with the highest similarity value 
def top_crops(user_sim_df, crop):
    print('Most Similar crops({}):\n'.format(crop))
    result = user_sim_df.sort_values(by=crop, ascending=False).loc[:,crop][1:11]
    for crop, sim in result.items():
        print('crop #{0}, Similarity value: {1:.2f}'.format(crop, sim))

In [4]:
# This function constructs a list of lists containing the highest rated shows per similar user
# and returns the name of the show along with the frequency it appears in the list
def similar_user_recs(user_sim_df, user):
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11] 
    best = []
    most_common = {}

    for i in sim_users:
        result_sorted = piv_norm.loc[:, i].sort_values(ascending = False)
        best.append(result_sorted.index[:5].tolist())
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_list[:5]   


In [32]:
# Main function for collaborative filtering based recommendation system
def recommendation_collaborative(dataframe):
    crop = dataframe
    
    print(crop)
    print()
    
    crops_label = crop['label'].unique().tolist()

    columns = crop.columns.tolist()
    columns.pop()
    sub_columns = columns.copy()
    columns.insert(0,'crop')

    big_list = []
    for index,i in enumerate(crops_label):
        big_list.append([0 for _ in range(8)])
        big_list[index][0] = i
        for inner_index, n in enumerate(columns[1:]):
            big_list[index][inner_index+1] = crop[crop['label']==i][n].mean()

    separate_list= []
    for index, i in enumerate(big_list):
        for n in sub_columns:
            temp_list = [0 for _ in range(3)]
            temp_list[0] = i[0]
            temp_list[1] = n
            temp_list[2] = crop[crop['label']==i[0]][n].mean()
            separate_list.append(temp_list)

    df = pd.DataFrame(separate_list, columns=['crop','factors','value'])
    
    print(df)
    print()
    
    piv = df.pivot_table(index=['crop'], columns=['factors'], values='value')
    
    print(piv)
    print()

    # Normalize the values
    scaler = RobustScaler()
    piv[piv.columns] = scaler.fit_transform(piv[piv.columns])
    piv_norm = piv
    
    print(piv_norm)
    print()
    
    piv_norm = piv_norm.T
    piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]

    # Our data needs to be in a sparse matrix format to be read by the following functions
    piv_sparse = sp.sparse.csr_matrix(piv_norm.values)

    item_similarity = cosine_similarity(piv_sparse)
    user_similarity = cosine_similarity(piv_sparse.T)

    item_sim_df = pd.DataFrame(item_similarity, index = piv_norm.index, columns = piv_norm.index)
    user_sim_df = pd.DataFrame(user_similarity, index = piv_norm.columns, columns = piv_norm.columns)
    
    
    print(item_sim_df)
    print()
    print(user_sim_df)
    print()

    return item_sim_df, user_sim_df, piv_norm

In [33]:
# MAIN #
crop = pd.read_csv('Crop_recommendation.csv')
factor_sim_df, crop_sim_df, piv_norm = recommendation_collaborative(crop)

        N   P   K  temperature   humidity        ph    rainfall   label
0      90  42  43    20.879744  82.002744  6.502985  202.935536    rice
1      85  58  41    21.770462  80.319644  7.038096  226.655537    rice
2      60  55  44    23.004459  82.320763  7.840207  263.964248    rice
3      74  35  40    26.491096  80.158363  6.980401  242.864034    rice
4      78  42  42    20.130175  81.604873  7.628473  262.717340    rice
...   ...  ..  ..          ...        ...       ...         ...     ...
2195  107  34  32    26.774637  66.413269  6.780064  177.774507  coffee
2196   99  15  27    27.417112  56.636362  6.086922  127.924610  coffee
2197  118  33  30    24.131797  67.225123  6.362608  173.322839  coffee
2198  117  32  34    26.272418  52.127394  6.758793  127.175293  coffee
2199  104  18  30    23.603016  60.396475  6.779833  140.937041  coffee

[2200 rows x 8 columns]

       crop      factors       value
0      rice            N   79.890000
1      rice            P   47.580000

In [34]:
# Function Example
top_factors(factor_sim_df, 'K') #ex.N,K,P,rainfall
print()
top_crops(crop_sim_df, 'rice') #ex.rice,apple,jute
# print(similar_user_recs(crop_sim_df, 'coffee')) #ex. rice,apple,jute

Similar shows to K include:

No. 1: P(0.76)
No. 2: humidity(0.05)
No. 3: N(0.04)
No. 4: rainfall(-0.03)
No. 5: temperature(-0.18)
No. 6: ph(-0.26)

Most Similar crops(rice):

crop #jute, Similarity value: 0.94
crop #coffee, Similarity value: 0.67
crop #coconut, Similarity value: 0.58
crop #banana, Similarity value: 0.31
crop #papaya, Similarity value: 0.28
crop #pigeonpeas, Similarity value: 0.27
crop #apple, Similarity value: 0.16
crop #pomegranate, Similarity value: 0.16
crop #cotton, Similarity value: 0.15
crop #maize, Similarity value: 0.08
