In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r'C:\Users\FINE LAPTOP\Desktop\AI Projects\Salary prediction\salary prediction.csv')

# Clean column names (remove spaces)
df.columns = df.columns.str.strip().str.upper().str.replace(' ', '_')

# Drop rows with missing critical values
df = df.dropna(subset=['AGE', 'SEX', 'UNIT', 'DESIGNATION', 'LEAVES_USED', 'LEAVES_REMAINING', 'RATINGS'])

# Optional: Reset index
df = df.reset_index(drop=True)

# View first few rows
print(df.head())


  FIRST_NAME LAST_NAME SEX         DOJ CURRENT_DATE DESIGNATION   AGE  SALARY  \
0     TOMASA     ARMEN   F   5-18-2014   01-07-2016     Analyst  21.0   44570   
1      OLIVE      ANCY   F   7-28-2014   01-07-2016     Analyst  21.0   40955   
2     CHERRY   AQUILAR   F  04-03-2013   01-07-2016     Analyst  22.0   45550   
3   VICTORIA       NaN   F   2-19-2013   01-07-2016     Analyst  22.0   48736   
4     ELLIOT    AGULAR   M  09-02-2013   01-07-2016     Analyst  22.0   40339   

        UNIT  LEAVES_USED  LEAVES_REMAINING  RATINGS  PAST_EXP  
0    Finance         24.0               6.0      2.0         0  
1    Finance         23.0               7.0      3.0         0  
2         IT         22.0               8.0      3.0         0  
3  Marketing         20.0              10.0      4.0         0  
4  Marketing         19.0              11.0      5.0         0  


In [2]:
from sklearn.preprocessing import LabelEncoder
import pickle

# Create label encoders
sex_encoder = LabelEncoder()
unit_encoder = LabelEncoder()
designation_encoder = LabelEncoder()

# Fit and transform
df['SEX'] = sex_encoder.fit_transform(df['SEX'])
df['UNIT'] = unit_encoder.fit_transform(df['UNIT'])
df['DESIGNATION'] = designation_encoder.fit_transform(df['DESIGNATION'])

# Save encoders
with open('sex_encoder.pkl', 'wb') as f:
    pickle.dump(sex_encoder, f)

with open('unit_encoder.pkl', 'wb') as f:
    pickle.dump(unit_encoder, f)

with open('designation_encoder.pkl', 'wb') as f:
    pickle.dump(designation_encoder, f)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define X and y
X = df[['AGE', 'SEX', 'UNIT', 'LEAVES_USED', 'LEAVES_REMAINING', 'RATINGS', 'PAST_EXP', 'DESIGNATION']]
y = df['SALARY']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build model
model = Sequential()
model.add(Dense(64, input_shape=(8,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Regression output

# Compile
model.compile(optimizer='adam', loss='mean_squared_error')

# Train
model.fit(X_train_scaled, y_train, epochs=100, batch_size=16, validation_split=0.2)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 4871080960.0000 - val_loss: 4183396352.0000
Epoch 2/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 4722276352.0000 - val_loss: 4171821568.0000
Epoch 3/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 4683449344.0000 - val_loss: 4139965184.0000
Epoch 4/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 4549009920.0000 - val_loss: 4075951872.0000
Epoch 5/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 4532289536.0000 - val_loss: 3972641536.0000
Epoch 6/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 4321214976.0000 - val_loss: 3824964864.0000
Epoch 7/100
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 4332344320.0000 - val_loss: 3634718208.0000
Epoch 8/100
[1m106/106

<keras.src.callbacks.history.History at 0x21b0cfd9090>

In [None]:
model.save('salary_model.keras')