In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import pickle

data = pd.read_csv('/content/Copper_Set.csv')
print(f"Initial dataset shape: {data.shape}")
data['material_ref'] = data['material_ref'].replace('00000', np.nan)
print(f"Dataset shape after replacing '00000' in material_ref: {data.shape}")
numeric_columns = ['thickness', 'width', 'selling_price']
for col in numeric_columns:
    data[col].fillna(data[col].median(), inplace=True)
print(f"Dataset shape after filling missing values for {numeric_columns}: {data.shape}")
categorical_columns = ['material_ref', 'status', 'item type']
for col in categorical_columns:
    data[col].fillna('-1', inplace=True)
print(f"Dataset shape after filling missing values for {categorical_columns}: {data.shape}")
regression_features = ['quantity tons', 'thickness', 'width', 'material_ref', 'item type']
data[regression_features] = data[regression_features].apply(pd.to_numeric, errors='coerce')
print(f"Dataset shape after converting {regression_features} to numeric: {data.shape}")
print("NaN values per column before dropping rows:", data[regression_features].isna().sum())
data.dropna(subset=regression_features, inplace=True)
print(f"Dataset shape after dropping rows with NaN in {regression_features}: {data.shape}")
if data.empty:
    raise ValueError("Dataset is empty after cleaning. Ensure valid entries in all required columns.")
label_encoder = LabelEncoder()
data['status'] = label_encoder.fit_transform(data['status'])
print(f"Dataset shape after encoding 'status': {data.shape}")
scaler = StandardScaler()
data[regression_features] = scaler.fit_transform(data[regression_features])
print(f"Dataset shape after scaling numeric features: {data.shape}")
X_reg = data[regression_features]
y_reg = data['selling_price']
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train_reg, y_train_reg)
reg_preds = regressor.predict(X_test_reg)
classification_features = regression_features
X_clf = data[classification_features]
y_clf = data['status']
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train_clf, y_train_clf)
clf_preds = classifier.predict(X_test_clf)
print("Regression RMSE:", np.sqrt(mean_squared_error(y_test_reg, reg_preds)))
print("Classification Accuracy:", accuracy_score(y_test_clf, clf_preds))
print("Classification Report:\n", classification_report(y_test_clf, clf_preds))
with open('regressor.pkl', 'wb') as f:
    pickle.dump(regressor, f)
with open('classifier.pkl', 'wb') as f:
    pickle.dump(classifier, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
st.title("Industrial Copper Modeling")
task = st.selectbox("Select Task:", ["Regression", "Classification"])
if task == "Regression":
    st.header("Predict Selling Price")
    user_input = {
        "quantity tons": st.number_input("Quantity (Tons)"),
        "thickness": st.number_input("Thickness"),
        "width": st.number_input("Width"),
        "material_ref": st.text_input("Material Reference"),
        "item type": st.text_input("Item Type"),
    }
    input_df = pd.DataFrame([user_input])
    input_df[regression_features] = scaler.transform(input_df[regression_features])
    reg_prediction = regressor.predict(input_df)
    st.write("Predicted Selling Price:", reg_prediction[0])
elif task == "Classification":
    st.header("Predict Status")
    user_input = {
        "quantity tons": st.number_input("Quantity (Tons)"),
        "thickness": st.number_input("Thickness"),
        "width": st.number_input("Width"),
        "material_ref": st.text_input("Material Reference"),
        "item type": st.text_input("Item Type"),
    }
    input_df = pd.DataFrame([user_input])
    input_df[classification_features] = scaler.transform(input_df[classification_features])
    clf_prediction = classifier.predict(input_df)
    st.write("Predicted Status:", "WON" if clf_prediction[0] == 1 else "LOST")

ModuleNotFoundError: No module named 'streamlit'