<a href="https://colab.research.google.com/github/Rifthi-tech/recommendation_project/blob/main/Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Setting Up the Environment

In [None]:
# Core data processing
import pandas as pd
import numpy as np

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

# Utilities
from datetime import datetime
import pickle
import os

# 2. Import data

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Recommendation Project/sample.csv")  # Replace with your actual file path
print(df.head())

                            uniq_id  \
0  c2d766ca982eca8304150849735ffef9   
1  7f7036a6d550aaa89d34c77bd39a5e48   
2  f449ec65dcbc041b6ae5e6a32717d01b   
3  0973b37acd0c664e3de26e97e5571454   
4  bc940ea42ee6bef5ac7cea3fb5cfbee7   

                                         product_url  \
0  http://www.flipkart.com/alisha-solid-women-s-c...   
1  http://www.flipkart.com/fabhomedecor-fabric-do...   
2  http://www.flipkart.com/aw-bellies/p/itmeh4grg...   
3  http://www.flipkart.com/alisha-solid-women-s-c...   
4  http://www.flipkart.com/sicons-all-purpose-arn...   

                            product_name  \
0    Alisha Solid Women's Cycling Shorts   
1    FabHomeDecor Fabric Double Sofa Bed   
2                             AW Bellies   
3                            product_url   
4  Sicons All Purpose Arnica Dog Shampoo   

                               product_category_tree  Unnamed: 4  \
0  ["Clothing >> Women's Clothing >> Lingerie, Sl...         NaN   
1  ["Furniture >> Living Ro

# 3. Data Cleaning

In [None]:
def generate_sample_data():
    """Generate sample product, rating, and purchase data"""
    # Sample products
    products = pd.DataFrame({
        'product_id': [101, 102, 103, 104, 105],
        'name': ['Laptop', 'Smartphone', 'Headphones', 'Keyboard', 'Mouse'],
        'price': [999.99, 699.99, '149.99', 79.99, np.nan],  # Intentional issues
        'category': ['Electronics', 'Electronics', None, 'Accessories', 'Accessories'],
        'description': ['High-performance', '', 'Noise-cancelling', None, 'Wireless']
    })

    # Sample ratings (with some invalid entries)
    ratings = pd.DataFrame({
        'user_id': ['U1', 'U2', 'U3', 'U4', 'U5'],
        'product_id': [101, 102, 999, 104, 105],
        'rating': [5, 4, 0, 6, 3],  # 0 and 6 are invalid
        'timestamp': ['2023-01-01', '2023-01-02', '2023-01-03', 'invalid_date', '2023-01-05']
    })

    # Sample purchases
    purchases = pd.DataFrame({
        'purchase_id': [1, 2, 3, 4, 5],
        'user_id': ['U1', 'U2', 'U3', 'U4', 'U5'],
        'product_id': [101, 102, 103, 104, 105],
        'purchase_date': ['2023-01-01 10:00', '2023-01-02 11:30', 'invalid', '2023-01-04', None]
    })

    return products, ratings, purchases

def clean_data(products, ratings, purchases):



    products.fillna({
        'description': 'No description',
        'category': 'uncategorized'
    }, inplace=True)

    products['price'] = pd.to_numeric(products['price'], errors='coerce')
    products['price'].fillna(products['price'].mean(), inplace=True)



    ratings = ratings[ratings['rating'].between(1, 5)]  # Keep only valid ratings
    ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], errors='coerce')
    ratings.dropna(subset=['timestamp'], inplace=True)



    purchases['purchase_date'] = pd.to_datetime(purchases['purchase_date'], errors='coerce')
    purchases.dropna(subset=['purchase_date'], inplace=True)

    print("\n Data cleaning completed successfully!")
    return products, ratings, purchases

def main():


    # 1. Generate sample data

    products, ratings, purchases = generate_sample_data()

    # 2. Clean data
    clean_products, clean_ratings, clean_purchases = clean_data(products.copy(),
                                                              ratings.copy(),
                                                              purchases.copy())

    # 3. Show ALL results


    print("\n CLEANED PRODUCTS (ALL):")
    print(clean_products.to_markdown(tablefmt="grid", numalign="center"))

    print("\n CLEANED RATINGS (ALL):")
    print(clean_ratings.to_markdown(tablefmt="grid", numalign="center"))

    print("\n CLEANED PURCHASES (ALL):")
    print(clean_purchases.to_markdown(tablefmt="grid", numalign="center"))


    print(" CLEANING STATS:")
    print(f"Products: {len(clean_products)} records (Original: {len(products)}) - All kept with fixes")
    print(f"Ratings: {len(clean_ratings)} valid records (Original: {len(ratings)}) - {len(ratings)-len(clean_ratings)} removed")
    print(f"Purchases: {len(clean_purchases)} valid records (Original: {len(purchases)}) - {len(purchases)-len(clean_purchases)} removed")
    print("="*50)

    print("\n SUCCESS: All data has been cleaned and is now ready for analysis!")

if __name__ == "__main__":
    main()


 Data cleaning completed successfully!

 CLEANED PRODUCTS (ALL):
+----+--------------+------------+---------+---------------+------------------+
|    |  product_id  | name       |  price  | category      | description      |
| 0  |     101      | Laptop     | 999.99  | Electronics   | High-performance |
+----+--------------+------------+---------+---------------+------------------+
| 1  |     102      | Smartphone | 699.99  | Electronics   |                  |
+----+--------------+------------+---------+---------------+------------------+
| 2  |     103      | Headphones | 149.99  | uncategorized | Noise-cancelling |
+----+--------------+------------+---------+---------------+------------------+
| 3  |     104      | Keyboard   |  79.99  | Accessories   | No description   |
+----+--------------+------------+---------+---------------+------------------+
| 4  |     105      | Mouse      | 482.49  | Accessories   | Wireless         |
+----+--------------+------------+---------+----------

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  products['price'].fillna(products['price'].mean(), inplace=True)


#4.Feature Extraction

In [None]:
def feature_extraction(products, ratings, purchases):
    try:
        print("Starting feature extraction process...\n")

        # Product features
        print("Step 1/5: Extracting product features using TF-IDF...")
        tfidf = TfidfVectorizer(stop_words='english')
        product_features = tfidf.fit_transform(products['description'] + " " + products['category'])
        print(f" Success! Product features matrix shape: {product_features.shape}\n")

        # User features from ratings
        print("Step 2/5: Extracting user features from ratings data...")
        user_ratings = ratings.groupby('user_id').agg({
            'rating': ['mean', 'count'],
            'product_id': 'nunique'
        }).reset_index()
        user_ratings.columns = ['user_id', 'avg_rating', 'rating_count', 'unique_products_rated']
        print(f" Success! Generated {len(user_ratings)} user rating records")
        print("First 3 user rating features:")
        print(user_ratings.head(3).to_string(), "\n")

        # User features from purchases
        print("Step 3/5: Extracting user features from purchases data...")
        user_purchases = purchases.groupby('user_id').agg({
            'product_id': ['count', 'nunique'],
            'purchase_date': 'max'
        }).reset_index()
        user_purchases.columns = ['user_id', 'total_purchases', 'unique_products_purchased', 'last_purchase_date']
        print(f" Success! Generated {len(user_purchases)} user purchase records")
        print("First 3 user purchase features:")
        print(user_purchases.head(3).to_string(), "\n")

        # Merge user features
        print("Step 4/5: Merging all user features...")
        user_features = pd.merge(user_ratings, user_purchases, on='user_id', how='outer')
        user_features.fillna(0, inplace=True)
        print(f" Success! Merged {len(user_features)} user feature records")
        print("First 3 merged user features:")
        print(user_features.head(3).to_string(), "\n")

        # Normalize features
        print("Step 5/5: Normalizing user features...")
        scaler = MinMaxScaler()
        cols_to_normalize = ['avg_rating', 'rating_count', 'unique_products_rated',
                            'total_purchases', 'unique_products_purchased']
        user_features[cols_to_normalize] = scaler.fit_transform(user_features[cols_to_normalize])
        print(" Successfully normalized user features!")
        print("First 3 normalized user features:")
        print(user_features.head(3).to_string(), "\n")

        print("\n Feature extraction completed successfully! ")
        print(f"Final product features shape: {product_features.shape}")
        print(f"Final user features shape: {user_features.shape}\n")

        return product_features, user_features, tfidf, scaler

    except Exception as e:
        print(f"\n Error in feature extraction: {str(e)}")
        raise

# Example usage:
if __name__ == "__main__":
    # Create sample data to test
    import pandas as pd
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.preprocessing import MinMaxScaler

    products = pd.DataFrame({
        'description': ['phone', 'laptop', 'tablet'],
        'category': ['electronics', 'electronics', 'electronics']
    })

    ratings = pd.DataFrame({
        'user_id': [1, 1, 2, 2, 3],
        'product_id': [1, 2, 1, 3, 2],
        'rating': [5, 4, 3, 5, 4]
    })

    purchases = pd.DataFrame({
        'user_id': [1, 1, 2, 3, 3],
        'product_id': [1, 2, 1, 2, 3],
        'purchase_date': pd.to_datetime(['2023-01-01', '2023-02-01', '2023-01-15', '2023-03-01', '2023-03-15'])
    })

    # Call the function
    product_features, user_features, tfidf, scaler = feature_extraction(products, ratings, purchases)

Starting feature extraction process...

Step 1/5: Extracting product features using TF-IDF...
 Success! Product features matrix shape: (3, 4)

Step 2/5: Extracting user features from ratings data...
 Success! Generated 3 user rating records
First 3 user rating features:
   user_id  avg_rating  rating_count  unique_products_rated
0        1         4.5             2                      2
1        2         4.0             2                      2
2        3         4.0             1                      1 

Step 3/5: Extracting user features from purchases data...
 Success! Generated 3 user purchase records
First 3 user purchase features:
   user_id  total_purchases  unique_products_purchased last_purchase_date
0        1                2                          2         2023-02-01
1        2                1                          1         2023-01-15
2        3                2                          2         2023-03-15 

Step 4/5: Merging all user features...
 Success! Merged

# 5. Model Training

In [None]:
def display_all_model_outputs(results):
    """Displays **all** trained model outputs with clear success messages and formatted data"""

    try:
        print("\n" + "="*50)
        print(" STARTING MODEL OUTPUT DISPLAY".center(50))
        print("="*50)

        # 1. User similarity matrix
        print("\n STEP 1/4: Displaying User Similarity Matrix")
        user_sim_df = pd.DataFrame(results['user_similarity'])
        print(f" Success! User similarity matrix shape: {user_sim_df.shape}")
        print("Top 5x5 portion of User Similarity Matrix:")
        print(user_sim_df.iloc[:5, :5].to_markdown(tablefmt="grid", floatfmt=".2f"))

        # 2. Product similarity matrix
        print("\n STEP 2/4: Displaying Product Similarity Matrix")
        product_sim_df = pd.DataFrame(results['product_similarity'])
        print(f" Success! Product similarity matrix shape: {product_sim_df.shape}")
        print("Top 5x5 portion of Product Similarity Matrix:")
        print(product_sim_df.iloc[:5, :5].to_markdown(tablefmt="grid", floatfmt=".2f"))

        # 3. Purchase matrix
        print("\n STEP 3/4: Displaying Purchase Matrix")
        print(f" Success! Purchase matrix shape: {results['purchase_matrix'].shape}")
        print("Sample of Purchase Matrix (first 5 rows):")
        print(results['purchase_matrix'].head().to_markdown(tablefmt="grid"))

        # 4. Hybrid model weights
        print("\n STEP 4/4: Displaying Hybrid Model Weights")
        weights_df = pd.DataFrame.from_dict(results['hybrid_weights'], orient='index', columns=['Weight'])
        print(" Success! Retrieved hybrid model weights")
        print("Hybrid Model Weights:")
        print(weights_df.to_markdown(tablefmt="grid", floatfmt=".2f"))

        print("\n" + "="*50)
        print(" ALL MODEL OUTPUTS DISPLAYED SUCCESSFULLY!".center(50))
        print("="*50)

    except KeyError as e:
        print(f"\n Error: Missing expected key in results - {str(e)}")
    except Exception as e:
        print(f"\n Unexpected error displaying outputs: {str(e)}")

# Example usage with test data
if __name__ == "__main__":
    import pandas as pd
    import numpy as np

    # Create sample model results
    model_results = {
        'user_similarity': np.random.rand(10, 10),
        'product_similarity': np.random.rand(15, 15),
        'purchase_matrix': pd.DataFrame(np.random.randint(0, 2, (10, 15))),
        'hybrid_weights': {
            'content_weight': 0.4,
            'collab_weight': 0.3,
            'popularity_weight': 0.2,
            'recency_weight': 0.1
        }
    }

    # Call the function
    display_all_model_outputs(model_results)


           STARTING MODEL OUTPUT DISPLAY          

 STEP 1/4: Displaying User Similarity Matrix
 Success! User similarity matrix shape: (10, 10)
Top 5x5 portion of User Similarity Matrix:
+----+------+------+------+------+------+
|    |    0 |    1 |    2 |    3 |    4 |
|  0 | 0.17 | 0.14 | 0.72 | 0.44 | 0.78 |
+----+------+------+------+------+------+
|  1 | 0.85 | 1.00 | 0.49 | 1.00 | 0.57 |
+----+------+------+------+------+------+
|  2 | 0.22 | 0.31 | 0.09 | 0.69 | 0.20 |
+----+------+------+------+------+------+
|  3 | 0.59 | 0.43 | 0.80 | 0.99 | 0.40 |
+----+------+------+------+------+------+
|  4 | 0.42 | 0.62 | 0.12 | 0.42 | 0.01 |
+----+------+------+------+------+------+

 STEP 2/4: Displaying Product Similarity Matrix
 Success! Product similarity matrix shape: (15, 15)
Top 5x5 portion of Product Similarity Matrix:
+----+------+------+------+------+------+
|    |    0 |    1 |    2 |    3 |    4 |
|  0 | 0.95 | 0.33 | 0.09 | 0.02 | 0.82 |
+----+------+------+------+------

# 6. Recommendation Functions

In [None]:
def collaborative_recommendation(user_id, model_data):
    """Generate collaborative filtering recommendations with detailed output"""


    try:
        # Step 1: Find user index
        print(f"\nSTEP 1: Locating user {user_id} in dataset...")
        user_idx = np.where(model_data['user_ids'] == user_id)[0][0]
        print(f" Found user at index {user_idx}")

        # Step 2: Calculate similarity scores
        print("\nSTEP 2: Calculating user similarity scores...")
        user_sim_scores = model_data['user_similarity'][user_idx]
        print(f" Calculated similarity scores for {len(user_sim_scores)} users")

        # Step 3: Get similar users
        print("\nSTEP 3: Identifying top similar users...")
        similar_users = np.argsort(user_sim_scores)[-10:-1]
        print(f" Top similar users found: {similar_users}")

        # Step 4: Get products from similar users
        print("\nSTEP 4: Analyzing products from similar users...")
        similar_users_products = model_data['user_product_matrix'].iloc[similar_users]
        recommended_products = similar_users_products.mean(axis=0).sort_values(ascending=False)
        print(f" Identified {len(recommended_products)} potential products")

        # Step 5: Filter already purchased products
        print("\nSTEP 5: Filtering out already purchased products...")
        user_products = set(model_data['user_product_matrix'].loc[user_id]
                          [model_data['user_product_matrix'].loc[user_id] > 0].index)
        recommended_products = recommended_products[~recommended_products.index.isin(user_products)]
        print(f" Filtered to {len(recommended_products)} new recommendations")

        # Final output

        print(f" COLLABORATIVE RECOMMENDATIONS FOR USER {user_id}".center(60))

        print(f"\nTop 10 Recommendations (of {len(recommended_products)} total):")
        print(recommended_products.head(10).to_frame(name='Recommendation Score').to_markdown(tablefmt="grid", floatfmt=".3f"))

        return recommended_products.index.tolist()

    except Exception as e:
        print("\n ERROR in Collaborative Recommendation:", str(e))
        return []

def content_based_recommendation(product_id, model_data):
    """Generate content-based recommendations with detailed output"""

    print(" STARTING CONTENT-BASED RECOMMENDATION".center(60))


    try:
        # Step 1: Find product index
        print(f"\nSTEP 1: Locating product {product_id} in dataset...")
        product_idx = np.where(model_data['product_ids'] == product_id)[0][0]
        print(f" Found product at index {product_idx}")

        # Step 2: Get similarity scores
        print("\nSTEP 2: Calculating product similarity scores...")
        sim_scores = list(enumerate(model_data['product_similarity'][product_idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:11]  # Get top 10 similar products
        print(f" Calculated similarity scores for {len(sim_scores)} products")

        # Step 3: Prepare recommendations
        print("\nSTEP 3: Preparing recommendations...")
        similar_products = [(model_data['product_ids'][i], score) for i, score in sim_scores]

        # Final output

        print(f" CONTENT-BASED RECOMMENDATIONS FOR PRODUCT {product_id}".center(60))

        print("\nTop 10 Similar Products:")
        print(pd.DataFrame(similar_products, columns=['Product ID', 'Similarity Score'])
                  .to_markdown(tablefmt="grid", floatfmt=".3f"))

        return [product for product, score in similar_products]

    except Exception as e:
        print("\n ERROR in Content-Based Recommendation:", str(e))
        return []

def hybrid_recommendation(user_id, product_id, model_data):
    """Generate hybrid recommendations with detailed output"""

    print(" STARTING HYBRID RECOMMENDATION".center(60))
    print(f"User ID: {user_id} | Product ID: {product_id}")

    try:
        # Step 1: Get collaborative recommendations
        print("\nPHASE 1: Running Collaborative Filtering...")
        collab_rec = collaborative_recommendation(user_id, model_data)
        print(f" Got {len(collab_rec)} collaborative recommendations")

        # Step 2: Get content-based recommendations
        print("\nPHASE 2: Running Content-Based Filtering...")
        content_rec = content_based_recommendation(product_id, model_data)
        print(f" Got {len(content_rec)} content-based recommendations")

        # Step 3: Combine recommendations
        print("\nPHASE 3: Combining recommendations with weights...")
        print(f"Weights: Collaborative={model_data['hybrid_weights']['collaborative']}, "
              f"Content={model_data['hybrid_weights']['content']}")

        recommendations = {}
        for product in collab_rec:
            recommendations[product] = model_data['hybrid_weights']['collaborative']

        for product in content_rec:
            recommendations[product] = recommendations.get(product, 0) + model_data['hybrid_weights']['content']

        # Step 4: Sort and finalize
        sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
        final_recommendations = [product for product, score in sorted_recommendations]

        # Final output

        print(f" HYBRID RECOMMENDATIONS FOR USER {user_id}".center(60))

        print("\nTop 10 Combined Recommendations:")
        print(pd.DataFrame(sorted_recommendations[:10], columns=['Product ID', 'Combined Score'])
                  .to_markdown(tablefmt="grid", floatfmt=".3f"))
        print(f"\nTotal unique recommendations: {len(final_recommendations)}")

        return final_recommendations

    except Exception as e:
        print("\n ERROR in Hybrid Recommendation:", str(e))
        return []

# Test execution
if __name__ == "__main__":
    import pandas as pd
    import numpy as np



    model_data = {
        'user_ids': np.array([1, 2, 3, 4, 5]),
        'product_ids': np.array(['P1', 'P2', 'P3', 'P4', 'P5']),
        'user_similarity': np.random.rand(5, 5),
        'product_similarity': np.random.rand(5, 5),
        'user_product_matrix': pd.DataFrame(np.random.randint(0, 2, (5, 5))),
        'hybrid_weights': {
            'collaborative': 0.6,
            'content': 0.4
        }
  }

    # Set test parameters
    test_user = 3
    test_product = "P3"

    # Run hybrid recommendation

    print(" TESTING HYBRID RECOMMENDATION SYSTEM".center(60))


    final_recommendations = hybrid_recommendation(test_user, test_product, model_data)


    print(f" FINAL RECOMMENDATIONS FOR USER {test_user}".center(60))
    print(f"\nReturned {len(final_recommendations)} recommendations:")
    print(final_recommendations)

            TESTING HYBRID RECOMMENDATION SYSTEM            
               STARTING HYBRID RECOMMENDATION               
User ID: 3 | Product ID: P3

PHASE 1: Running Collaborative Filtering...

STEP 1: Locating user 3 in dataset...
 Found user at index 2

STEP 2: Calculating user similarity scores...
 Calculated similarity scores for 5 users

STEP 3: Identifying top similar users...
 Top similar users found: [3 0 2 1]

STEP 4: Analyzing products from similar users...
 Identified 5 potential products

STEP 5: Filtering out already purchased products...
 Filtered to 2 new recommendations
          COLLABORATIVE RECOMMENDATIONS FOR USER 3          

Top 10 Recommendations (of 2 total):
+----+------------------------+
|    |   Recommendation Score |
|  2 |                  0.500 |
+----+------------------------+
|  0 |                  0.250 |
+----+------------------------+
 Got 2 collaborative recommendations

PHASE 2: Running Content-Based Filtering...
            STARTING CONTENT-BAS

# 7.Export data

In [3]:
import pandas as pd
import numpy as np
import os

def export_model_data_separately(model_data, directory="/content/drive/MyDrive/Recommendation Project/"):
    """
    Export trained model components into separate CSV files.

    Args:
        model_data (dict): Dictionary containing all trained model components.
        directory (str): Path where CSV files will be saved.

    Returns:
        bool: True if export succeeded, False otherwise.
    """
    try:
        # Ensure directory exists
        os.makedirs(directory, exist_ok=True)
        print(f"\nStarting export of model data to {directory}...\n")

        # Validate required keys in model_data
        required_keys = ['user_ids', 'product_ids', 'user_similarity', 'product_similarity', 'purchase_matrix', 'hybrid_weights']
        for key in required_keys:
            if key not in model_data:
                raise KeyError(f"Missing required model data key: {key}")

        # Prepare user similarity data
        user_sim_df = pd.DataFrame(
            model_data['user_similarity'],
            columns=[f"user_sim_{i}" for i in range(model_data['user_similarity'].shape[1])],
            index=model_data['user_ids']
        ).reset_index().rename(columns={'index': 'user_id'})
        user_sim_df.to_csv(os.path.join(directory, "user_similarity.csv"), index=False)
        print(f"User similarity matrix saved!")

        # Prepare product similarity data
        product_sim_df = pd.DataFrame(
            model_data['product_similarity'],
            columns=[f"product_sim_{i}" for i in range(model_data['product_similarity'].shape[1])],
            index=model_data['product_ids']
        ).reset_index().rename(columns={'index': 'product_id'})
        product_sim_df.to_csv(os.path.join(directory, "product_similarity.csv"), index=False)
        print(f"Product similarity matrix saved!")

        # Prepare purchase matrix
        purchase_matrix = model_data['purchase_matrix']
        purchase_matrix.index.name = 'user_id'
        purchase_matrix.columns.name = 'product_id'
        purchase_df = purchase_matrix.reset_index().melt(
            id_vars='user_id',
            var_name='product_id',
            value_name='purchase_value'
        )
        purchase_df.to_csv(os.path.join(directory, "purchase_data.csv"), index=False)
        print(f"Purchase records saved!")

        # Prepare hybrid weights
        weights_df = pd.DataFrame.from_dict(
            model_data['hybrid_weights'],
            orient='index',
            columns=['weight_value']
        ).reset_index().rename(columns={'index': 'weight_type'})
        weights_df.to_csv(os.path.join(directory, "hybrid_weights.csv"), index=False)
        print(f"Model weights saved!")

        print("\nSuccessfully exported all model data to separate files.")
        return True

    except KeyError as ke:
        print(f"\nKeyError: {str(ke)} - Please check model_data keys.")
        return False
    except Exception as e:
        print(f"\nError exporting model data: {str(e)}")
        return False

# TESTING THE FUNCTION WITH SAMPLE DATA
if __name__ == "__main__":
    model_data = {
        'user_ids': np.array(['U1', 'U2', 'U3']),
        'product_ids': np.array(['P101', 'P102', 'P103', 'P104']),
        'user_similarity': np.random.rand(3, 3),
        'product_similarity': np.random.rand(4, 4),
        'purchase_matrix': pd.DataFrame(
            np.random.randint(0, 2, (3, 4)),
            index=['U1', 'U2', 'U3'],
            columns=['P101', 'P102', 'P103', 'P104']
        ),
        'hybrid_weights': {
            'content_weight': 0.4,
            'collab_weight': 0.3,
            'popularity_weight': 0.2,
            'recency_weight': 0.1
        }
    }

    export_success = export_model_data_separately(model_data)

    if export_success:
        print("\nAll trained data successfully exported to separate CSV files!")
    else:
        print("\nExport failed - check error messages.")


Starting export of model data to /content/drive/MyDrive/Recommendation Project/...

User similarity matrix saved!
Product similarity matrix saved!
Purchase records saved!
Model weights saved!

Successfully exported all model data to separate files.

All trained data successfully exported to separate CSV files!
