<a href="https://colab.research.google.com/github/1vanl0pez/DataScienceSantiagoIvan/blob/streamLit/DSPRO1_HRAnalytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [92]:
!pip install streamlit pyngrok
!pip install millify



In [93]:
# Obtain file from Google Drive
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
file_path = '/content/drive/MyDrive/general_data.xlsx'
df = pd.read_excel(file_path)
# Save dataframe as a CSV to make it easier to load in the Streamlit script
df.to_csv('/content/general_data.csv', index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [111]:
%%writefile app.py
# Import libraries
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from millify import millify

# Cargar datos desde el archivo CSV guardado
file_path = '/content/general_data.csv'
df = pd.read_csv(file_path)
# Paso de limpieza de datos
columns_with_na = df.columns[df.isnull().sum() > 0].tolist()
for col in columns_with_na:
    median_value = df[col].median()
    df.fillna({col: median_value}, inplace=True)
df = pd.get_dummies(df, drop_first=True)

def suggest_optimized_adjustments(input_data, model, step=0.5, target_increase=0.5, max_satisfaction=4.0, max_iterations=500):
    base_prediction = model.predict(pd.DataFrame(input_data, columns=top_5_features.columns))[0]
    target_prediction = min(base_prediction + target_increase, max_satisfaction)
    adjusted_data = input_data.copy()
    correlations = {feature: data_stayed[feature].corr(data_stayed["JobSatisfaction"]) for feature in top_5_features}
    for iteration in range(max_iterations):
        total_adjustments = []
        adjustments_applied = False
        for idx, feature in enumerate(top_5_features):
            if feature == "MonthlyIncome":
                percent_salary_hike_index = top_5_features.columns.get_loc("PercentSalaryHike")
                if adjusted_data[0, percent_salary_hike_index] > 30 and all(adjusted_data[0, i] <= 0 for i in range(len(top_5_features)) if top_5_features[i] != "MonthlyIncome"):
                    adjusted_data[0, idx] += 100
                    new_prediction = model.predict(pd.DataFrame(adjusted_data, columns=top_5_features))[0]
                    if new_prediction >= target_prediction:
                        return {
                            "Adjusted Inputs": dict(zip(top_5_features, adjusted_data[0])),
                            "New Prediction": new_prediction,
                            "Iterations": iteration + 1
                        }
                    adjustments_applied = True
                    continue
            input_modified = adjusted_data.copy()
            relationship_direction = 1 if correlations[feature] > 0 else -1
            proposed_value = adjusted_data[0, idx] + step * relationship_direction
            if proposed_value <= 0:
                continue
            if feature == "PercentSalaryHike" and proposed_value > 30:
                continue
            input_modified[0, idx] = proposed_value
            new_prediction = model.predict(pd.DataFrame(input_modified, columns=top_5_features.columns))[0]
            impact_per_unit = (new_prediction - base_prediction) / step
            if abs(impact_per_unit) > 0:
                adjustment_step = step * relationship_direction
                total_adjustments.append((idx, adjustment_step, impact_per_unit))
                adjustments_applied = True

        for idx, adjustment_step, _ in total_adjustments:
            adjusted_data[0, idx] += adjustment_step

        new_prediction = model.predict(pd.DataFrame(adjusted_data, columns=top_5_features.columns))[0]
        if new_prediction >= target_prediction:
            return {
                "Adjusted Inputs": dict(zip(top_5_features.columns, adjusted_data[0])),
                "New Prediction": new_prediction,
                "Iterations": iteration + 1
            }
        if not adjustments_applied:
            return {
                "Message": "No further optimization possible while maintaining positive values.",
                "Adjusted Inputs": dict(zip(top_5_features, adjusted_data[0])),
                "Final Prediction": new_prediction
            }
    return {
        "Message": "Max iterations reached without achieving target increase.",
        "Adjusted Inputs": dict(zip(top_5_features.columns, adjusted_data[0])),
        "Final Prediction": new_prediction
    }

# Definir características y pasos de entrenamiento del modelo
data_stayed = df[df['Attrition_Yes'] == 0]
X_stayed = data_stayed.drop(columns=["JobSatisfaction"])
y_stayed = data_stayed["JobSatisfaction"]

top_5_features = X_stayed[["MonthlyIncome", "DistanceFromHome", "PercentSalaryHike",
                           "YearsSinceLastPromotion", "TrainingTimesLastYear"]]
X_important_stayed = data_stayed[top_5_features.columns]

final_model = RandomForestRegressor(n_estimators=100, random_state=24)
final_model.fit(X_important_stayed, y_stayed)

# Streamlit UI
col1, col2 = st.columns([1, 3])
with col1:
    st.image("satislab_logo.png", width=200)
with col2:
    st.title("Job Satisfaction Prediction App")
st.header("Enter the following data to determine job satisfaction:")
st.subheader("Features range :")
for feature in top_5_features.columns:
    min_val = data_stayed[feature].min()
    max_val = data_stayed[feature].max()
    mean_val = data_stayed[feature].mean()
    st.write(f"{feature} - Min: {min_val} - Max: {max_val} - Avg: {mean_val:.2f}")

input_data = []
for feature in top_5_features.columns:
    value = st.number_input(feature, min_value=0.0)
    input_data.append(value)

input_data = np.array([input_data])

if st.button('Predict Satisfaction'):
    satisfaction_prediction = final_model.predict(pd.DataFrame(input_data, columns=top_5_features.columns))[0]

    if satisfaction_prediction <= 2:
        traffic_light = "Red"
        traffic_image = "red.png"
    elif 2 < satisfaction_prediction <= 3:
        traffic_light = "Yellow"
        traffic_image= "yellow.png"
    else:
        traffic_light = "Green"
        traffic_image = "green.png"

    cols = st.columns([1,1,1,1])
    with cols[1]:
        st.markdown(f"""
            <div style="display: flex; align-items: center; justify-content: space-between; height: 135px; padding: 10px; border: 1px solid #ddd; border-radius: 10px; margin: 10px 0;">
                <div style="text-align: center;">
                    <div style="font-size: 20px; font-weight: bold; color: #a8b0b2;">Predicted satisfaction</div>
                    <div style="font-size: 40px; font-weight: bold; color: #0671e3;">{satisfaction_prediction:.2f}</div>
                </div>
            </div>
        """, unsafe_allow_html=True)
    with cols[2]:
        st.image(traffic_image, width=75)

    if traffic_light != "Green":
        st.write("Optimizing adjustments to improve satisfaction...")
        optimization_result = suggest_optimized_adjustments(input_data, final_model, step=0.5, target_increase=0.5, max_satisfaction=4.0)
        if "Adjusted Inputs" in optimization_result:
            original_values = dict(zip(top_5_features.columns, input_data[0]))
            adjusted_values = optimization_result["Adjusted Inputs"]
            final_prediction = optimization_result.get("New Prediction", satisfaction_prediction)
            iterations = optimization_result.get("Iterations", 0)
            prediction_delta = final_prediction - satisfaction_prediction
            st.subheader("Optimization results:")
            # Create columns for displaying metrics
            columns = st.columns(len(top_5_features.columns))
            for idx, (feature, adjusted_value) in enumerate(adjusted_values.items()):
                original_value = original_values[feature]
                delta = adjusted_value - original_value
                with columns[idx]:  # Place each metric in its corresponding column
                    if feature == "MonthlyIncome":
                        income = millify(adjusted_value, precision=5)
                        st.metric(label=feature, value=income, delta=f"{delta:.2f}")
                    else:
                        st.metric(label=feature, value=f"{adjusted_value:.2f}", delta=f"{delta:.2f}")
            # Display final prediction and iterations
            if final_prediction <= 2:
                traffic_image = "red.png"
            elif 2 < final_prediction <= 3:
                traffic_image= "yellow.png"
            else:
                traffic_image = "green.png"
            colus = st.columns([1,1,1,1])
            with colus[1]:
                st.markdown(f"""
                    <div style="display: flex; align-items: center; justify-content: space-between; height: 135px; padding: 10px; border: 1px solid #ddd; border-radius: 10px; margin: 10px 0;">
                        <div style="text-align: center;">
                            <div style="font-size: 20px; font-weight: bold; color: #a8b0b2;">Final job satisfaction</div>
                            <div style="font-size: 40px; font-weight: bold; color: #0671e3;">{final_prediction:.2f}</div>
                        </div>
                    </div>
                """, unsafe_allow_html=True)
            with colus[2]:
                st.image(traffic_image, width=75)
                st.write(f"Iterations: {iterations}")
        else:
            st.write(optimization_result["Message"])
    else:
        st.write("Satisfaction is already at green level. No adjustments needed.")

Overwriting app.py


In [103]:
!ngrok authtoken 2pWW0XfbvaktveDe8yiGGwsULY4_2ruivCyVZmVyQYPSEPGrQ


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [104]:
!killall ngrok


In [105]:
from pyngrok import ngrok

# Connect to port 8501 and specify the tunnel type
public_url = ngrok.connect(8501)
print(f"Streamlit app URL: {public_url}")


Streamlit app URL: NgrokTunnel: "https://9375-34-86-198-240.ngrok-free.app" -> "http://localhost:8501"


In [106]:
!streamlit run app.py &>/dev/null&

