In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [1]:
# Load the datasets
club_skills_df = pd.read_excel('Dataset/Club_Skills_Dataset_Extended.xlsx')
ratings_df = pd.read_excel('Dataset/Ratings_dataset.xlsx')


NameError: name 'pd' is not defined

In [None]:
# Inspect the Club Skills dataset
print("Club Skills Dataset:\n", club_skills_df.head())

# Inspect the Ratings dataset
print("Ratings Dataset:\n", ratings_df.head())


Club Skills Dataset:
    Sr. No                      Club           Skill  \
0       1  AALEKH - Art Elated Club         Drawing   
1       2  AALEKH - Art Elated Club       Sketching   
2       3  AALEKH - Art Elated Club        Painting   
3       4  AALEKH - Art Elated Club     Digital Art   
4       5  AALEKH - Art Elated Club  Graphic Design   

                                           Image url  
0  https://mitaoe.ac.in/assets/images/club/aalekh...  
1  https://mitaoe.ac.in/assets/images/club/aalekh...  
2  https://mitaoe.ac.in/assets/images/club/aalekh...  
3  https://mitaoe.ac.in/assets/images/club/aalekh...  
4  https://mitaoe.ac.in/assets/images/club/aalekh...  
Ratings Dataset:
    Sr. No                                     Club Name   Rating
0       1                       AALEKH - Art Elated Club     480
1       2                                           AERO     495
2       3  TEAM NIYUDRATH RACING (TNR) - AUTOSPORTS CLUB     490
3       4                        CodeCh

In [None]:
# Remove any leading/trailing spaces from column names
club_skills_df.columns = club_skills_df.columns.str.strip()
ratings_df.columns = ratings_df.columns.str.strip()

# Rename columns to align with each other, if needed
club_skills_df.rename(columns={'Club': 'Club Name'}, inplace=True)
ratings_df.rename(columns={'Club Name': 'Club Name'}, inplace=True)  # This line may not be necessary


In [None]:
# Merge datasets on 'Club Name' to combine skills and ratings for each club
merged_df = pd.merge(club_skills_df, ratings_df, on='Club Name', how='inner')
print("Merged Dataset:\n", merged_df.head())


Merged Dataset:
    Sr. No_x                 Club Name           Skill  \
0         1  AALEKH - Art Elated Club         Drawing   
1         2  AALEKH - Art Elated Club       Sketching   
2         3  AALEKH - Art Elated Club        Painting   
3         4  AALEKH - Art Elated Club     Digital Art   
4         5  AALEKH - Art Elated Club  Graphic Design   

                                           Image url  Sr. No_y  Rating  
0  https://mitaoe.ac.in/assets/images/club/aalekh...         1     480  
1  https://mitaoe.ac.in/assets/images/club/aalekh...         1     480  
2  https://mitaoe.ac.in/assets/images/club/aalekh...         1     480  
3  https://mitaoe.ac.in/assets/images/club/aalekh...         1     480  
4  https://mitaoe.ac.in/assets/images/club/aalekh...         1     480  


In [None]:
# Create pivot table with clubs as rows, skills as columns
club_skill_matrix = merged_df.pivot_table(index='Club Name', columns='Skill', aggfunc='size', fill_value=0)
print("Club Skill Matrix:\n", club_skill_matrix.head())


Club Skill Matrix:
 Skill                     3D Modeling  API Development  Accent Training  \
Club Name                                                                 
AALEKH - Art Elated Club            0                0                0   
AERO                                1                0                0   
CodeChef MITAOE Chapter             0                0                0   
Digital Design                      1                0                0   
Drama                               0                0                0   

Skill                     Acting  Actuators  Adaptability  \
Club Name                                                   
AALEKH - Art Elated Club       0          0             0   
AERO                           0          0             0   
CodeChef MITAOE Chapter        0          0             0   
Digital Design                 1          0             0   
Drama                          0          0             0   

Skill                     

In [None]:
# Convert the matrix to a sparse matrix format
sparse_matrix = csr_matrix(club_skill_matrix.values)


In [None]:
# Compute cosine similarity between clubs
similarity_matrix = cosine_similarity(sparse_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=club_skill_matrix.index, columns=club_skill_matrix.index)


In [None]:
def recommend_clubs(skill, top_n=5):
    if skill not in club_skill_matrix.columns:
        print("Skill not found.")
        return []
    
    # Filter clubs that have the specified skill
    clubs_with_skill = club_skill_matrix[club_skill_matrix[skill] > 0].index
    
    # Calculate the average similarity for clubs with the skill
    recommendations = []
    for club in clubs_with_skill:
        similar_clubs = similarity_df[club].sort_values(ascending=False).index
        recommended_clubs = [c for c in similar_clubs if c != club][:top_n]
        recommendations.extend(recommended_clubs)
    
    # Return unique recommendations
    return list(set(recommendations))[:top_n]


In [None]:
# Example usage
example_skill = 'Thermodynamics'  # replace with any skill to test
recommended_clubs = recommend_clubs(example_skill)
print(f"Clubs recommended for skill '{example_skill}':", recommended_clubs)


Clubs recommended for skill 'Thermodynamics': ['Srujan', 'AERO', 'TEAM NIYUDRATH RACING (TNR) - AUTOSPORTS CLUB', 'Prakruti', 'Robotics']


In [None]:
import pickle

# Save model components (similarity matrix and skill matrix) to a pickle file
with open('club_recommendation_model.pkl', 'wb') as file:
    pickle.dump({'similarity_matrix': similarity_df, 'club_skill_matrix': club_skill_matrix}, file)

print("Model saved as 'club_recommendation_model.pkl'")


Model saved as 'club_recommendation_model.pkl'


In [None]:
# Load the model components from the pickle file
with open('club_recommendation_model.pkl', 'rb') as file:
    model_data = pickle.load(file)

# Extract the loaded data
loaded_similarity_df = model_data['similarity_matrix']
loaded_club_skill_matrix = model_data['club_skill_matrix']

print("Model loaded successfully.")


NameError: name 'pickle' is not defined