# Employee Salary Prediction – Internship Project
By: Abhinav Mathur

In [None]:
!pip install streamlit pyngrok scikit-learn pandas numpy

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
np.random.seed(42)
df = pd.DataFrame({
    'Job Title': ['Data Scientist', 'HR Manager', 'Software Engineer', 'Analyst', 'Project Manager'] * 20,
    'Education Level': ['Masters', 'Bachelors', 'Masters', 'PhD', 'Bachelors'] * 20,
    'Experience': np.random.randint(1, 15, 100),
    'Gender': ['Male', 'Female'] * 50,
    'Location': ['Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Chennai'] * 20,
    'Age': np.random.randint(22, 45, 100),
    'Salary': np.random.randint(40000, 150000, 100)
})
df.head()

In [None]:
job_map = {'Analyst': 0, 'Data Scientist': 1, 'HR Manager': 2, 'Project Manager': 3, 'Software Engineer': 4}
edu_map = {'Bachelors': 0, 'Masters': 1, 'PhD': 2}
gender_map = {'Male': 1, 'Female': 0}
loc_map = {'Bangalore': 0, 'Chennai': 1, 'Delhi': 2, 'Hyderabad': 3, 'Mumbai': 4}

df['Job Title'] = df['Job Title'].map(job_map)
df['Education Level'] = df['Education Level'].map(edu_map)
df['Gender'] = df['Gender'].map(gender_map)
df['Location'] = df['Location'].map(loc_map)

X = df.drop('Salary', axis=1)
y = df['Salary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Save model
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

# Evaluation
y_pred = model.predict(X_test)
print("R2 Score:", r2_score(y_test, y_pred))

## 📦 Streamlit App + pyngrok with CSV Column Check

In [None]:
from pyngrok import ngrok
import os
import threading
import time

# Streamlit app content with column validation
app_code = '''
import streamlit as st
import pandas as pd
import numpy as np
import pickle

model = pickle.load(open("model.pkl", "rb"))

st.sidebar.title("Employee Salary Prediction")
st.sidebar.markdown("**Internship Project**")
st.sidebar.markdown("Created by: Abhinav Mathur")
st.sidebar.markdown("---")
st.sidebar.markdown("🔹 Fill form or upload CSV")
st.sidebar.markdown("🔹 Click Predict")
st.sidebar.markdown("🔹 [Download Sample CSV](sample_input_data.csv)", unsafe_allow_html=True)

st.title("Salary Prediction App")
required_cols = ['Job Title', 'Education Level', 'Experience', 'Gender', 'Location', 'Age']

uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("Uploaded Data:")
    st.dataframe(df)

    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        st.error(f"❌ Uploaded CSV is missing required columns: {', '.join(missing_cols)}")
    else:
        job_map = {'Analyst': 0, 'Data Scientist': 1, 'HR Manager': 2, 'Project Manager': 3, 'Software Engineer': 4}
        edu_map = {'Bachelors': 0, 'Masters': 1, 'PhD': 2}
        gender_map = {'Male': 1, 'Female': 0}
        loc_map = {'Bangalore': 0, 'Chennai': 1, 'Delhi': 2, 'Hyderabad': 3, 'Mumbai': 4}

        df_encoded = df.copy()
        df_encoded['Job Title'] = df['Job Title'].map(job_map)
        df_encoded['Education Level'] = df['Education Level'].map(edu_map)
        df_encoded['Gender'] = df['Gender'].map(gender_map)
        df_encoded['Location'] = df['Location'].map(loc_map)

        input_data = df_encoded[['Job Title', 'Education Level', 'Experience', 'Gender', 'Location', 'Age']]
        df['Predicted Salary'] = model.predict(input_data)

        st.write("Predictions:")
        st.dataframe(df)
        st.download_button("Download Predictions", df.to_csv(index=False), "predicted_salaries.csv", "text/csv")
else:
    st.subheader("Enter Employee Details")
    job = st.selectbox("Job Role", ["Data Scientist", "HR Manager", "Software Engineer", "Analyst", "Project Manager"])
    education = st.selectbox("Education Level", ["Bachelors", "Masters", "PhD"])
    experience = st.slider("Years of Experience", 0, 20, 3)
    hours = st.slider("Hours per Week", 20, 60, 40)
    age = st.number_input("Age", min_value=18, max_value=65, value=30)

    job_map = {'Analyst': 0, 'Data Scientist': 1, 'HR Manager': 2, 'Project Manager': 3, 'Software Engineer': 4}
    edu_map = {'Bachelors': 0, 'Masters': 1, 'PhD': 2}
    input_data = np.array([[job_map[job], edu_map[education], experience, 1, 2, age]])

    if st.button("Predict Salary"):
        prediction = model.predict(input_data)[0]
        st.success(f"Estimated Salary: ₹ {int(prediction):,}")
'''

with open("app.py", "w") as f:
    f.write(app_code)

# Setup ngrok and launch
ngrok.set_auth_token("30EW9XTMsAssnSsGjPK9lhzGrDt_6zvGsL4VTqzz8HjacnEGq")
def run():
    os.system("streamlit run app.py")

threading.Thread(target=run).start()
time.sleep(5)
public_url = ngrok.connect(8501)
print("🔗 Public Streamlit URL:", public_url)
