<a href="https://colab.research.google.com/github/Jyothika-12/Pulse-Detection-Using-OpenCV/blob/main/Salary_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv("Salary Data.csv")

# Remove Gender (not needed)
df = df.drop(columns=["Gender"])

# Drop rows where Salary is missing
df = df.dropna(subset=["Salary"])

# Add placeholder Skills column if missing
if "Skills" not in df.columns:
    df["Skills"] = "General"

# Features and target
X = df[["Years of Experience", "Age", "Skills", "Education Level", "Job Title"]]
y = df["Salary"]

# Categorical and numerical columns
categorical_cols = ["Skills", "Education Level", "Job Title"]
numerical_cols = ["Years of Experience", "Age"]

# Preprocessor with imputation and encoding
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("encoder", OneHotEncoder(drop="first", handle_unknown="ignore"))
        ]), categorical_cols),
        ("num", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="mean"))
        ]), numerical_cols)
    ]
)

# Build pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model.fit(X_train, y_train)

# Salary prediction function
def predict_salary(years_experience, age, skills, education, job_title):
    sample = pd.DataFrame({
        "Years of Experience": [years_experience],
        "Age": [age],
        "Skills": [skills],
        "Education Level": [education],
        "Job Title": [job_title]
    })
    return model.predict(sample)[0]

# Example usage
predicted = predict_salary(7, 35, "Python, Machine Learning", "Master's", "Data Scientist")
print(f"Predicted Salary: ${predicted:,.2f}")

Predicted Salary: $94,268.30


