In [18]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, roc_auc_score
from sklearn.preprocessing import LabelEncoder

In [12]:
def train_model(X_train, y_train, model_type):
    if model_type == "Logistic Regression":
        model = LogisticRegression()
    elif model_type == "Random Forest":
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_type == "XGBoost":
        model = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
    elif model_type == "SVM":
        model = SVC(kernel='linear', random_state=42, probability=True)
    
    model.fit(X_train, y_train)
    return model

In [16]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    return accuracy, conf_matrix, class_report, roc_auc

In [19]:
def analysis_and_model_page():
    st.title("Анализ данных и модель")
    
    # Загрузка данных
    uploaded_file = st.file_uploader("data/predictive_maintenance.csv", type="csv")
    
    if uploaded_file is not None:
        data = pd.read_csv(uploaded_file)
    
    # Предобработка данных
    data = data.drop(columns=['UDI', 'Product ID', 'TWF', 'HDF', 'PWF', 'OSF','RNF'])
    data['Type'] = LabelEncoder().fit_transform(data['Type'])
    print(data.isnull().sum())
    
    
    # Разделение данных
    X = data.drop(columns=['Machine failure'])
    y = data['Machine failure']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Выбор модели
    model_type = st.selectbox("Выберите модель", 
                            ["Logistic Regression", "Random Forest", "XGBoost", "SVM"])
    
    # Обучение модели
    if st.button("Обучить модель"):
        model = train_model(X_train, y_train, model_type)
        
        # Оценка модели
        accuracy, conf_matrix, class_report, roc_auc = evaluate_model(model, X_test, y_test)
        
        # Вывод результатов
        st.header("Результаты обучения модели")
        st.write(f"Accuracy: {accuracy:.2f}")
        st.write(f"ROC-AUC: {roc_auc:.2f}")
         
        st.subheader("Confusion Matrix")
        fig, ax = plt.subplots()
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
        st.pyplot(fig)
        
        st.subheader("Classification Report")
        st.text(class_report)
        
        # ROC-кривая
        st.subheader("ROC-кривая")
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        fig, ax = plt.subplots()
        ax.plot(fpr, tpr, label=f"{model_type} (AUC = {roc_auc:.2f})")
        ax.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Random Guess')
        ax.set_xlabel('False Positive Rate')
        ax.set_ylabel('True Positive Rate')
        ax.set_title('ROC-кривая')
        ax.legend()
        st.pyplot(fig)
        
        # Сохранение модели в session_state
        st.session_state.model = model
    
    # Предсказание на новых данных
    st.header("Предсказание на новых данных")
    with st.form("prediction_form"):
        st.write("Введите значения признаков для предсказания:")
        productID = st.selectbox("productID", ["L", "M", "H"])
        air_temp = st.number_input("air temperature [K]")
        process_temp = st.number_input("process temperature [K]")
        rotational_speed = st.number_input("rotational speed [rpm]")
        torque = st.number_input("torque [Nm]")
        tool_wear = st.number_input("tool wear [min]")
        
        submit_button = st.form_submit_button("Предсказать")
        
        if submit_button and 'model' in st.session_state:
            # Преобразование введенных данных
            type_map = {"L": 0, "M": 1, "H": 2}
            input_data = pd.DataFrame({
                'Type': [type_map[type_]],
                'Air temperature [K]': [air_temp],
                'Process temperature [K]': [process_temp],
                'Rotational speed [rpm]': [rotational_speed],
                'Torque [Nm]': [torque],
                'Tool wear [min]': [tool_wear]
            })
            
            scaler = StandardScaler()
            numerical_features = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque', 'Tool wear']
            input_data[numerical_features] = scaler.fit_transform(data[numerical_features])
            
            
            # Предсказание
            prediction = st.session_state.model.predict(input_data)
            prediction_proba = st.session_state.model.predict_proba(input_data)[:, 1]
            
            st.subheader("Результат предсказания")
            if prediction[0] == 1:
                st.error(f"Прогнозируется отказ оборудования (вероятность: {prediction_proba[0]:.2%})")
            else:
                st.success(f"Оборудование в норме (вероятность отказа: {prediction_proba[0]:.2%})")

if __name__ == "__main__":
    analysis_and_model_page()

2025-05-16 20:15:24.320 
  command:

    streamlit run /Users/donndiia/opt/anaconda3/lib/python3.9/site-packages/ipykernel_launcher.py [ARGUMENTS]


UnboundLocalError: local variable 'data' referenced before assignment