In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [2]:
# Load the data
data = pd.read_csv('updated_e_waste_dataset.csv')

In [16]:
data[('Item Name')].unique()

array(['Toshiba Fire TV', 'LG Sound Bar', 'Nikon D850', 'Amazon Echo',
       'JBL Charge 4', 'Surface Laptop 4', 'LG OLED TV',
       'Panasonic Viera', 'Garmin Forerunner', 'Harman Kardon Speaker',
       'Fitbit Versa 3', 'Nintendo Switch', 'Galaxy S21',
       'Apple Watch Series 6', 'Sony Alpha A7', 'DJI Mavic Air 2',
       'Xbox Series X', 'Sony Bravia TV', 'iPad Pro', 'Lenovo Yoga',
       'Galaxy Note 20', 'MacBook Air', 'Roku Ultra', 'iPhone 11',
       'iPhone 12', 'Pixel 5', 'ThinkPad X1', 'Samsung QLED TV', 'PS5',
       'Dell XPS 13', 'Denon AV Receiver', 'GoPro Hero 9', 'Google Home',
       'Samsung Blu-ray Player', 'Yamaha Receiver', 'Ring Doorbell',
       'Pioneer AV Receiver', 'HP Spectre', 'Bose SoundLink',
       'MacBook Pro', 'Apple TV 4K', 'Samsung Galaxy Watch 3',
       'Surface Pro 7', 'Fire TV Stick', 'Nest Thermostat',
       'Canon EOS R5', 'Oculus Quest 2', 'Sony WH-1000XM4',
       'Galaxy Tab S7'], dtype=object)

In [3]:
# Define the features and target
X = data.drop(columns=['Item Name', 'Profit', 'Current Metal Value ($)'])
y = data['Profit']

In [4]:
# Create a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), X.select_dtypes(include=['int64', 'float64']).columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), X.select_dtypes(include=['object']).columns)
    ]
)


In [5]:
# Create a pipeline with preprocessing and model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', LinearRegression())
])

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train the model using the pipeline
pipeline.fit(X_train, y_train)

In [10]:
X_train.columns

Index(['Category', 'Brand Name', 'Device Age', 'Device Condition',
       'Material Recovery Rate', 'Device Type', 'Year of Manufacture',
       'Market Value of Metals', 'Cost of Recovery', 'Gold (g)',
       'Aluminum (g)', 'Silver (g)', 'Carbon (g)', 'Platinum (g)',
       'Rhodium (g)', 'Nickel (g)', 'Tin (g)', 'Lithium (g)',
       'Recycling Score'],
      dtype='object')

In [12]:
X_train['Brand Name'].unique()

array(['Dell', 'Panasonic', 'Sony', 'Samsung', 'Philips', 'Toshiba', 'HP',
       'Lenovo', 'LG', 'Apple'], dtype=object)

In [None]:
data[''].unique()

In [13]:
X_train['Device Condition'].unique()

array(['Broken', 'Average', 'Good'], dtype=object)

In [9]:
X_train['Category'].unique()

array(['Cat3', 'Cat4', 'Cat2', 'Cat1'], dtype=object)

In [None]:
# Make predictions
y_pred = pipeline.predict(X_test)


In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

In [None]:
print(f'Mean Squared Error: {mse}')
print(f'R-squared Score: {r2}')
print(f'Mean Absolute Error: {mae}')
print(f'Root Mean Squared Error: {rmse}')

Mean Squared Error: 2.2480995942311844e-26
R-squared Score: 1.0
Mean Absolute Error: 1.1529976973179146e-13
Root Mean Squared Error: 1.499366397593058e-13


In [None]:
# Save the pipeline
import pickle
filename = 'e_waste_pipeline.pkl'
with open(filename, 'wb') as file:
    pickle.dump(pipeline, file)

print(f"Pipeline saved to {filename}")

Pipeline saved to e_waste_pipeline.pkl


In [None]:

# Load and use the pipeline
loaded_pipeline = pickle.load(open('/content/e_waste_pipeline.pkl', 'rb'))
prediction = loaded_pipeline.predict(X_test)


In [None]:
# Show the first prediction
print(f"First prediction: {prediction[0]}")

First prediction: 326.2100000000002
