In [8]:
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm



# Load the data
data_path = r'C:\Users\LOQ\OneDrive\Pose Health Care\MA (PPop)\สถิติ Pose Repairman.xlsx'
df = pd.read_excel(data_path, sheet_name="ข้อมูลการใช้นำยา")


In [9]:
df
df = df.dropna()


In [10]:
df

Unnamed: 0,แผนก,หมายเลขเครื่อง,ปัญหา,ระยะเวลาในใช้น้ำยา /แบต (วัน)
0,WARD 5C,2,น้ำยาหมด,23
1,WARD 5C,3,น้ำยาหมด,28
2,WARD 5C,3,น้ำยาหมด,30
3,WARD 5C,2,น้ำยาหมด,40
4,WARD 5C,3,น้ำยาหมด,25
...,...,...,...,...
1962,KIDNEY(ศูนย์โรคไต),1473,แบตเตอร์รี่หมด,153
1963,KIDNEY(ศูนย์โรคไต),1474,แบตเตอร์รี่หมด,169
1964,KIDNEY(ศูนย์โรคไต),1475,แบตเตอร์รี่หมด,156
1965,KIDNEY(ศูนย์โรคไต),1476,แบตเตอร์รี่หมด,157


In [11]:
# Display DataFrame columns for debugging
st.write("DataFrame Columns:")
st.write(df.columns)

2024-07-16 11:26:09.718 
  command:

    streamlit run c:\Users\LOQ\OneDrive\streamlit\.venv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [12]:
# Define column names
department_column = 'แผนก'
machine_id_column = 'หมายเลขเครื่อง'
issue_column = 'ปัญหา'
maintenance_duration_column = 'ระยะเวลาในใช้น้ำยา /แบต (วัน)'


In [13]:
# Ensure correct column names and preprocessing
if maintenance_duration_column in df.columns:
    df[maintenance_duration_column] = pd.to_numeric(df[maintenance_duration_column], errors='coerce')
    df.dropna(subset=[maintenance_duration_column], inplace=True)
else:
    st.error(f"Column '{maintenance_duration_column}' not found in the data.")
    st.stop()

In [14]:
# Streamlit app
st.title("Machinery Maintenance Information and Prediction")

DeltaGenerator()

In [15]:
# Display the data
st.header("Maintenance Records")
st.table(df)

DeltaGenerator()

In [16]:

# One-hot encode the categorical features
encoder = OneHotEncoder(sparse_output=False)
encoded_features = encoder.fit_transform(df[[department_column, issue_column]])



In [17]:

# Create a DataFrame with the encoded features
encoded_feature_names = encoder.get_feature_names_out([department_column, issue_column])
encoded_df = pd.DataFrame(encoded_features, columns=encoded_feature_names)


In [18]:

# Combine the encoded features with the numeric feature
X = pd.concat([df[[machine_id_column]], encoded_df], axis=1)
y = df[maintenance_duration_column]


In [19]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

In [21]:
# Train the model using statsmodels
X_train_sm = sm.add_constant(X_train)  # adding a constant
ols_model = sm.OLS(y_train, X_train_sm).fit()


In [22]:
# Make predictions using sklearn
y_pred = model.predict(X_test)

In [23]:
# Model evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

In [27]:
# Display model performance
st.header("Model Performance")
st.write("**Scikit-learn Linear Regression Model**")
st.write(f"Mean Absolute Error (MAE): {mae}")
st.write(f"Mean Squared Error (MSE): {mse}")
st.write(f"Root Mean Squared Error (RMSE): {rmse}")

st.write("**Statsmodels OLS Regression Model**")
st.write(f"Mean Absolute Error (MAE): {mae_sm}")
st.write(f"Mean Squared Error (MSE): {mse_sm}")
st.write(f"Root Mean Squared Error (RMSE): {rmse_sm}")


In [25]:
# Make predictions using statsmodels
X_test_sm = sm.add_constant(X_test)
y_pred_sm = ols_model.predict(X_test_sm)


In [26]:
# Model evaluation using statsmodels
mae_sm = mean_absolute_error(y_test, y_pred_sm)
mse_sm = mean_squared_error(y_test, y_pred_sm)
rmse_sm = np.sqrt(mse_sm)

In [28]:
st.write("Statsmodels OLS Regression Summary")
st.text(ols_model.summary())
ols_model.summary()

0,1,2,3
Dep. Variable:,ระยะเวลาในใช้น้ำยา /แบต (วัน),R-squared:,0.92
Model:,OLS,Adj. R-squared:,0.916
Method:,Least Squares,F-statistic:,249.2
Date:,"Tue, 16 Jul 2024",Prob (F-statistic):,0.0
Time:,11:28:23,Log-Likelihood:,-6224.3
No. Observations:,1573,AIC:,12590.0
Df Residuals:,1503,BIC:,12960.0
Df Model:,69,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,65.5625,1.484,44.175,0.000,62.651,68.474
หมายเลขเครื่อง,-0.0018,0.003,-0.597,0.551,-0.008,0.004
แผนก_7C พักแพทย์,12.7195,2.613,4.867,0.000,7.593,17.846
แผนก_BIH_ICU6,-5.8938,3.032,-1.944,0.052,-11.842,0.054
แผนก_BIH_ICU7,-7.8726,3.181,-2.475,0.013,-14.113,-1.633
แผนก_BIH_ICU8,-6.7385,3.276,-2.057,0.040,-13.165,-0.312
แผนก_BIH_Ward4S1,-6.4754,3.735,-1.734,0.083,-13.802,0.851
แผนก_BIH_Ward4S2,-7.6642,3.008,-2.548,0.011,-13.564,-1.764
แผนก_BIH_เวรเปล,-6.9171,4.188,-1.651,0.099,-15.133,1.299

0,1,2,3
Omnibus:,84.045,Durbin-Watson:,1.977
Prob(Omnibus):,0.0,Jarque-Bera (JB):,280.241
Skew:,0.15,Prob(JB):,1.3999999999999999e-61
Kurtosis:,5.046,Cond. No.,5.46e+18


In [29]:
# Predict for a new input
st.header("Predict Time Until Maintenance Issue")
machine_id = st.number_input("Enter Machine ID:", min_value=int(df[machine_id_column].min()), max_value=int(df[machine_id_column].max()))
selected_department = st.selectbox("Select Department", df[department_column].unique())
selected_issue = st.selectbox("Select Issue", df[issue_column].unique())

2024-07-16 11:28:40.330 Session state does not function when running a script without `streamlit run`


In [30]:
# Prepare the input data for prediction
input_data = pd.DataFrame([[machine_id, selected_department, selected_issue]], columns=[machine_id_column, department_column, issue_column])
input_encoded = encoder.transform(input_data[[department_column, issue_column]])
input_encoded_df = pd.DataFrame(input_encoded, columns=encoded_feature_names)
input_final = pd.concat([input_data[[machine_id_column]], input_encoded_df], axis=1)


In [33]:
input_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   หมายเลขเครื่อง  1 non-null      int64 
 1   แผนก            1 non-null      object
 2   ปัญหา           1 non-null      object
dtypes: int64(1), object(2)
memory usage: 156.0+ bytes


In [31]:

if st.button("Predict"):
    # Prediction using sklearn model
    prediction = model.predict(input_final)
    st.write(f"Predicted Time Until Maintenance Issue (sklearn): {prediction[0]:.2f} days")

    # Prediction using statsmodels model
    input_final_sm = np.squeeze(input_final)
    prediction_sm = ols_model.predict(input_final_sm)
    st.write(f"Predicted Time Until Maintenance Issue (statsmodels): {prediction_sm[0]:.2f} days")


In [32]:
# Filter by machine type
st.sidebar.header("Filter by Machine Type")
selected_machine_type = st.sidebar.selectbox("Select Machine Type", df[department_column].unique())
filtered_data = df[df[department_column] == selected_machine_type]
st.header(f"Records for Machine Type: {selected_machine_type}")
st.table(filtered_data)

DeltaGenerator()

In [None]:
#Statmodels
input = 
input_final_sm = np.squeeze(input_final)
prediction_sm = ols_model.predict(input_final_sm)
print(f"Predicted Time Until Maintenance Issue (statsmodels): {prediction_sm[0]:.2f} days")
