<a href="https://colab.research.google.com/github/Abi24-glitch/GenAI-Hackathon-/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Sample data loading (replace with your actual data source)
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 5],
    'product_id': [101, 102, 103, 101, 104, 102, 105, 106, 107, 108],
    'rating': [5, 4, 3, 5, 2, 4, 5, 3, 4, 5]
}
df = pd.DataFrame(data)

# Data Quality Check
print("Data Quality Check:")
print(df.isnull().sum())  # Check for null values

# Data Quantity Check
print("Data Quantity Check:")
print(df.shape)  # Number of rows and columns

# Create user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='product_id', values='rating').fillna(0)
print("\nUser-Item Matrix:")
print(user_item_matrix)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

print("\nUser Similarity Matrix:")
print(user_similarity_df)

# Function to get recommendations
def get_recommendations(user_id, user_item_matrix, user_similarity_df, n_recommendations=3):
    # Get similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]  # Exclude the user themselves

    # Get products rated by similar users
    recommendations = pd.Series(dtype=float)

    for similar_user in similar_users:
        # Get products rated by similar user
        similar_user_ratings = user_item_matrix.loc[similar_user]
        # Only consider products that the user hasn't rated yet
        unrated_products = similar_user_ratings[similar_user_ratings > 0].index.difference(user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index)
        recommendations = recommendations.add(similar_user_ratings[unrated_products], fill_value=0)

    # Return top N recommendations
    return recommendations.sort_values(ascending=False).head(n_recommendations)

# Example: Get recommendations for user_id 1
recommended_products = get_recommendations(1, user_item_matrix, user_similarity_df)
print("\nRecommended Products for User 1:")
print(recommended_products)

# Function to evaluate recommendations
def evaluate_recommendations(true_ratings, predicted_ratings):
    mae = mean_absolute_error(true_ratings, predicted_ratings)
    rmse = mean_squared_error(true_ratings, predicted_ratings, squared=False)
    return mae, rmse

# Example: Calculate MAE and RMSE for the recommendations (assuming true ratings are known)
# Here, we will simulate true ratings for the recommended products
true_ratings = [5, 4, 3]  # Example true ratings for the recommended products
predicted_ratings = recommended_products.values  # Predicted ratings from the recommendation system

mae, rmse = evaluate_recommendations(true_ratings, predicted_ratings)
print(f"\nMean Absolute Error: {mae}")
print(f"Root Mean Squared Error: {rmse}")

Data Quality Check:
user_id       0
product_id    0
rating        0
dtype: int64
Data Quantity Check:
(10, 3)

User-Item Matrix:
product_id  101  102  103  104  105  106  107  108
user_id                                           
1           5.0  4.0  3.0  0.0  0.0  0.0  0.0  0.0
2           5.0  0.0  0.0  2.0  0.0  0.0  0.0  0.0
3           0.0  4.0  0.0  0.0  5.0  0.0  0.0  0.0
4           0.0  0.0  0.0  0.0  0.0  3.0  4.0  0.0
5           0.0  0.0  0.0  0.0  0.0  0.0  0.0  5.0

User Similarity Matrix:
user_id         1         2         3    4    5
user_id                                        
1        1.000000  0.656532  0.353381  0.0  0.0
2        0.656532  1.000000  0.000000  0.0  0.0
3        0.353381  0.000000  1.000000  0.0  0.0
4        0.000000  0.000000  0.000000  1.0  0.0
5        0.000000  0.000000  0.000000  0.0  1.0

Recommended Products for User 1:
product_id
105    5.0
108    5.0
107    4.0
dtype: float64

Mean Absolute Error: 0.6666666666666666
Root Mean Squared E

