In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Excel file
xls = pd.ExcelFile('/content/Cust_segmentation_and_Recomendation_with_Recommendations.xlsx')
sheet_names = xls.sheet_names

# Load the data from the sheet into a dataframe
df = pd.read_excel(xls, sheet_names[0])

# Combine the values of the customer profile columns into a single string for each customer
customer_profile_columns = [
    'txt_profession_desc',
    'nam_custadr_city',
    'nam_custadr_state',
    'txt_holdadr_add3',
    'txt_cust_typ',
    'risk_category',
    'wealth_mng_cust',
    'annual_inc_trnor',
    'resi_type',
    'nam_product',
    'promotional_offers',
    'Bill Payment',
    'Dining ',
    'Loan',
    'Travel & hospitality ',
    'Vehicle Running Expenses',
    'Credit Card Payement ',
    'Gym_Fitness_Centre ',
    'B2B_Payment',
    'High_value_Transactions_Above_25k',
    'Shopping ',
    'Investments ',
    'No_of_transactions',
    'Total Spent ',
    'Month ',
    'Customer_Value',
    'Type_of Investor ',
    'Profession',
    'Frequency_of_Transactions',
    'Type_of_Transactions'
]

# Convert numerical columns to string
for column in customer_profile_columns:
    df[column] = df[column].astype(str)

# Create a new column 'customer_profile' that contains the combined text data
df['customer_profile'] = df[customer_profile_columns].agg(' '.join, axis=1)

# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer to the 'customer_profile' column and transform the column into TF-IDF vectors
tfidf_matrix = vectorizer.fit_transform(df['customer_profile'])

# Compute the cosine similarities between the TF-IDF vectors
cosine_similarities = cosine_similarity(tfidf_matrix)

def recommend_services(row_num, top_n_similar=5):
    # Get the similarity scores for the given row
    similarity_scores = list(enumerate(cosine_similarities[row_num]))

    # Sort the customers based on the similarity scores
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top-n most similar customers
    top_similar_scores = similarity_scores[1:top_n_similar+1]  # We start from index 1 because the top match would be the customer itself

    # Get the customer indices of the top-n most similar customers
    top_similar_indices = [i[0] for i in top_similar_scores]

    # For each recommendation column, get the most common recommendation among the top-n most similar customers
    recommendations = {}
    recommendation_columns = ['Loan Recommendation', 'Investment Recommendation', 'Credit Card Recommendation', 'Insurance Recommendation']
    for column in recommendation_columns:
        top_recommendations = df.iloc[top_similar_indices][column].value_counts().index.tolist()
        recommendations[column] = top_recommendations[0] if top_recommendations else None

    return recommendations

# Get recommendations for the first row
recommendations = recommend_services(0)
print(recommendations)


{'Loan Recommendation': 'Medium', 'Investment Recommendation': 'Medium', 'Credit Card Recommendation': 'Platinum', 'Insurance Recommendation': 'Basic'}
