In [5]:
#made by TOlulope Ladele


import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
def load_dataset(file_path):
    return pd.read_csv('/Users/pro/Desktop/hackathon/dummy_sustain_data.csv')

# Preprocess the dataset
def preprocess_data(data):
    # Define bio-fabricated materials
    bio_fabricated_materials = ['Mycelium', 'Algae Fabric', 'Mushroom Leather', 'Bacterial Cellulose']
    
    # Create a binary flag for bio-fabricated materials
    data['bio_material'] = data['material'].apply(lambda x: 1 if x in bio_fabricated_materials else 0)
    
    # Drop columns that are not needed for model training
    X = data.drop(['product_id', 'material', 'bio_material'], axis=1, errors='ignore')
    y = data['bio_material']
    
    # Encode categorical features
    X = pd.get_dummies(X, drop_first=True)
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return pd.DataFrame(X_scaled, columns=X.columns), y, X.columns, scaler

data = load_dataset('sustains.csv')
X, y, feature_columns, scaler = preprocess_data(data)


In [7]:
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
    
    return model

model = train_model(X, y)


              precision    recall  f1-score   support

           0       0.61      0.73      0.67       188
           1       0.34      0.23      0.28       112

    accuracy                           0.54       300
   macro avg       0.48      0.48      0.47       300
weighted avg       0.51      0.54      0.52       300



In [8]:
def suggest_alternative_materials(data, product_name, current_material, model, scaler, feature_columns):
    # Filter the dataset to find the specific product
    product = data[(data['product_name'] == product_name) & (data['material'] == current_material)]
    
    if product.empty:
        print(f"No products found with name '{product_name}' and material '{current_material}'.")
        return []
    
    # Remove the current product from the dataset to search for alternatives
    alternative_products = data[(data['product_name'] == product_name) & (data['material'] != current_material)]
    
    if alternative_products.empty:
        print(f"No alternative materials found for the product '{product_name}' with material '{current_material}'.")
        return []

    # Predict bio-fabricated materials for alternative products
    alternative_products_X = alternative_products.drop(['product_id', 'material', 'bio_material'], axis=1, errors='ignore')
    alternative_products_X = pd.get_dummies(alternative_products_X, drop_first=True)
    
    # Add missing columns with default values
    for col in feature_columns:
        if col not in alternative_products_X.columns:
            alternative_products_X[col] = 0
    alternative_products_X = alternative_products_X[feature_columns]
    
    # Scale features
    alternative_products_X_scaled = scaler.transform(alternative_products_X)
    
    # Predict
    predictions = model.predict(alternative_products_X_scaled)
    alternative_products['is_bio_material'] = predictions
    
    # Filter out bio-fabricated materials
    suggested_alternatives = alternative_products[alternative_products['is_bio_material'] == 1]
    
    return suggested_alternatives


In [9]:
def main():
    # Load dataset
    file_path = 'sustains.csv'  # Path to your dataset
    data = load_dataset(file_path)
    
    # Preprocess data and train model
    X, y, feature_columns, scaler = preprocess_data(data)
    model = train_model(X, y)
    
    # User input
    product_name = input("Enter the product name: ")
    current_material = input("Enter the current material: ")
    
    # Suggest alternative materials
    suggested_alternatives = suggest_alternative_materials(data, product_name, current_material, model, scaler, feature_columns)
    
    if not suggested_alternatives.empty:
        print(f"Suggested Alternative Materials for {product_name} with material {current_material}:")
        print(suggested_alternatives[['product_name', 'material', 'co2_emissions', 'water_usage', 'energy_consumption', 'recycled_content', 'env_score']])
    else:
        print(f"No alternative materials found for the product '{product_name}' with material '{current_material}'.")

if __name__ == "__main__":
    main()


              precision    recall  f1-score   support

           0       0.61      0.73      0.67       188
           1       0.34      0.23      0.28       112

    accuracy                           0.54       300
   macro avg       0.48      0.48      0.47       300
weighted avg       0.51      0.54      0.52       300

Suggested Alternative Materials for Shoes with material Nylon:
    product_name             material  co2_emissions  water_usage  \
14         Shoes  Bacterial Cellulose     101.534071    26.097865   
19         Shoes     Mushroom Leather      60.489741    33.739431   
28         Shoes         Algae Fabric     106.771464    15.197197   
31         Shoes             Mycelium     116.952853    32.872367   
42         Shoes     Mushroom Leather      50.185502    43.769948   
..           ...                  ...            ...          ...   
880        Shoes         Algae Fabric     156.632708    47.949776   
889        Shoes  Bacterial Cellulose     132.398372    35

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  alternative_products['is_bio_material'] = predictions
