In [14]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [15]:
# Load datasets
file_paths = [
    "Steel_Product_List_200_Modified.csv",
    "Iron_Product_List_200_Modified.csv",
    "Wood_Product_List_200_Modified.csv",
    "Concrete_Product_List_200_Modified.csv",
    "Cement_Product_List_200_Modified.csv",
    "aggregate_product_list_200.csv",
    "Bricks_products_list_200.csv"
]
dataframes = [pd.read_csv(path) for path in file_paths]
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.head()


Unnamed: 0,Name,Material Type,Application,Strength,Water Resistance,Cost,Durability,Eco-Friendly
0,TATA Steel Structura,Steel,Building Framework,High,Medium,Low,High,Yes
1,JSW Steel 600D,Steel,Reinforcement Bars,High,High,Low,High,Yes
2,SAIL Structural Beams,Steel,Heavy Construction,Low,Low,Low,Low,Yes
3,Essar Steel Plates,Steel,Industrial Infrastructure,Low,Low,Low,Low,No
4,Jindal Panther Steel Bars,Steel,Reinforced Buildings,High,Medium,Low,High,No


In [16]:
# Define input and output columns
input_features = ['Material Type', 'Cost', 'Durability', 'Strength', 'Water Resistance']
output_columns = ['Name', 'Application']

# Clean and preprocess
filtered_df = combined_df.dropna(subset=input_features + output_columns).reset_index(drop=True)

# Set up encoder
categorical_features = input_features
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)]
)

# Fit and transform
X = preprocessor.fit_transform(filtered_df[input_features])


In [19]:
def recommend_products(material_type, cost, durability, strength, water_resistance, top_n=3):
    import numpy as np
    # Build a single-row DataFrame from user input
    user_input_df = pd.DataFrame([{
        'Material Type': material_type,
        'Cost': cost,
        'Durability': durability,
        'Strength': strength,
        'Water Resistance': water_resistance
    }])

    # Transform the input
    user_vector = preprocessor.transform(user_input_df)

    # Compute similarity
    similarities = cosine_similarity(user_vector, X).flatten()

    # Get top N similar product indices
    top_indices = similarities.argsort()[::-1][:top_n]

    # Return recommended products
    return filtered_df.loc[top_indices, ['Name', 'Application']]


In [20]:
# Example input
recommend_products(
    material_type='Concrete',
    cost='Medium',
    durability='Medium',
    strength='Medium',
    water_resistance='Medium'
)


Unnamed: 0,Name,Application
377,Jaypee Strong OPC Concrete,Large Buildings & Infrastructure
367,Reliance Ultra-Strength Concrete,Heavy Industrial Use
75,Bhilai Advanced Construction Steel,Buildings & Bridges
