In [None]:
from google.colab import files
uploaded = files.upload()

Saving complex_employee_data.csv to complex_employee_data.csv


In [None]:
# Load and preprocess for regression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load dataset
df = pd.read_csv("complex_employee_data.csv")

# Drop irrelevant column
df.drop(columns=["EmployeeID"], inplace=True)

# Encode categorical columns
encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le

# Features and regression target
X = df.drop("Salary", axis=1)
y = df["Salary"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Train regression models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import joblib

# Linear Regression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

# Random Forest Regressor
rf = RandomForestRegressor()
rf.fit(X_train, y_train)


In [None]:
# Save
joblib.dump(linreg, "linreg_model.pkl")
joblib.dump(rf, "rf_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoders, "encoders.pkl")
joblib.dump(X.columns.tolist(), "feature_columns.pkl")


['feature_columns.pkl']

In [None]:
%%writefile app.py
import streamlit as st
import numpy as np
import pandas as pd
import joblib
import plotly.express as px
import plotly.graph_objects as go
from fpdf import FPDF
import base64
from datetime import datetime
import matplotlib.pyplot as plt

# Load models and tools
linreg = joblib.load("linreg_model.pkl")
rf = joblib.load("rf_model.pkl")
scaler = joblib.load("scaler.pkl")
encoders = joblib.load("encoders.pkl")
feature_columns = joblib.load("feature_columns.pkl")

st.set_page_config(page_title="Employee Salary Prediction", layout="centered")
st.title("💼 Employee Salary Predictor")
st.markdown("### Enter employee details to predict exact salary")

# PDF generator
def generate_pdf(input_data, prediction, model_choice):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="Employee Salary Prediction Report", ln=True, align="C")
    pdf.cell(200, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True, align="C")
    pdf.ln(10)

    for k, v in input_data.items():
        pdf.cell(200, 10, txt=f"{k}: {v}", ln=True)

    pdf.ln(10)
    pdf.cell(200, 10, txt=f"Model Used: {model_choice}", ln=True)
    pdf.cell(0, 10, f"Predicted Salary: INR {round(prediction, 2)}", ln=True)

    filename = "salary_report.pdf"
    pdf.output(filename)

    with open(filename, "rb") as f:
        b64 = base64.b64encode(f.read()).decode()
    return f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">📎 Download PDF Report</a>'

# UI input
input_data = {}
with st.form("form"):
    for col in feature_columns:
        if col in encoders:
            options = encoders[col].classes_.tolist()
            input_data[col] = st.selectbox(f"{col}", options)
        else:
            input_data[col] = st.number_input(f"{col}", value=0.0)

    model_choice = st.selectbox("Choose Model", ["Linear Regression", "Random Forest"])
    submitted = st.form_submit_button("Predict Salary")

# Preprocess user input
def preprocess_input(user_input):
    df = pd.DataFrame([user_input])
    for col, le in encoders.items():
        df[col] = le.transform(df[col])
    return scaler.transform(df)

# On Submit
if submitted:
    X_input = preprocess_input(input_data)

    linreg_pred = linreg.predict(X_input)[0]
    rf_pred = rf.predict(X_input)[0]
    prediction = linreg_pred if model_choice == "Linear Regression" else rf_pred

    st.success(f"🎯 **Predicted Salary: INR {round(prediction, 2)}**")

    # PDF
    st.markdown(generate_pdf(input_data, prediction, model_choice), unsafe_allow_html=True)

    # Display DataFrame
    display_df = pd.DataFrame([input_data])
    display_df["Predicted Salary"] = round(prediction, 2)

    # Radar Chart
    st.subheader("📊 Employee Input Overview (Radar Plot)")
    numeric_cols = [col for col in feature_columns if col not in encoders]
    if numeric_cols:
        radar_fig = go.Figure()
        radar_fig.add_trace(go.Scatterpolar(
            r=[input_data[col] for col in numeric_cols],
            theta=numeric_cols,
            fill='toself',
            name='Employee Profile'
        ))
        radar_fig.update_layout(
            polar=dict(radialaxis=dict(visible=True)),
            showlegend=False,
            title="📌 Employee Feature Radar View"
        )
        st.plotly_chart(radar_fig)

    # Bar Chart (Filtered numeric + predicted)
    st.subheader("📋 Feature Values")
    exclude_cols = ["Gender", "Education", "Department", "JobTitle"]
    filtered_display = {
        k: v for k, v in input_data.items()
        if k not in exclude_cols
    }
    filtered_display["Predicted Salary"] = prediction
    filtered_df = pd.DataFrame([filtered_display])

    melted = filtered_df.melt(id_vars=["Predicted Salary"])
    bar_fig = px.bar(
        melted,
        x="variable",
        y="value",
        color="variable",
        text="value",
        title="📊 Filtered Feature Breakdown"
    )
    st.plotly_chart(bar_fig)

    # Matplotlib Bar Chart for Both Model Outputs
    st.subheader("🤖 Model Salary Predictions")
    models = ['Linear Regression', 'Random Forest']
    predictions = [linreg_pred, rf_pred]

    fig, ax = plt.subplots()
    ax.bar(models, predictions, color=['#4c72b0', '#55a868'])
    ax.set_title('Model Salary Predictions for Employee')
    ax.set_ylabel('Predicted Salary')
    ax.set_ylim(0, max(predictions) * 1.2)
    ax.grid(axis='y')

    st.pyplot(fig)


Writing app.py


In [None]:
!pip install streamlit plotly joblib fpdf --quiet
!pip install pyngrok --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m73.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m85.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fpdf (setup.py) ... [?25l[?25hdone


In [None]:
!ngrok config add-authtoken YOUR_AUTH_TOKEN_HERE

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
# Launch Streamlit app with Ngrok tunnel
import os
from pyngrok import ngrok

# Kill any existing tunnels
ngrok.kill()

port = 8501

# Start ngrok tunnel
public_url = ngrok.connect(port)
print("🔗 Ngrok URL:", public_url)

# Run Streamlit in background
!streamlit run app.py --server.port 8501 > /dev/null 2>&1 &


🔗 Ngrok URL: NgrokTunnel: "https://7d008c649626.ngrok-free.app" -> "http://localhost:8501"
