In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression


In [4]:
df = pd.read_csv("hiring.csv")
print(df)


  experience  test_score(out of 10)  interview_score(out of 10)  salary($)
0        NaN                    8.0                           9      50000
1        NaN                    8.0                           6      45000
2       five                    6.0                           7      60000
3        two                   10.0                          10      65000
4      seven                    9.0                           6      70000
5      three                    7.0                          10      62000
6        ten                    NaN                           7      72000
7     eleven                    7.0                           8      80000


In [5]:
print(df.isnull().sum())


experience                    2
test_score(out of 10)         1
interview_score(out of 10)    0
salary($)                     0
dtype: int64


In [8]:
df['test_score(out of 10)'] = df['test_score(out of 10)'].fillna(df['test_score(out of 10)'].mean())

In [12]:
word_to_int = {
    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
    'eleven': 11, 'twelve': 12
}

df['experience'] = df['experience'].fillna(0)

df['experience'] = df['experience'].apply(lambda x: word_to_int.get(str(x).lower(), x))

df['experience'] = pd.to_numeric(df['experience'])

X = df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']]
y = df['salary($)']

model = LinearRegression()
model.fit(X, y)

salary1 = model.predict([[2, 9, 6]])
salary2 = model.predict([[12, 10, 10]])

print("Predicted salary (2 yr, 9 test, 6 interview):", salary1[0])
print("Predicted salary (12 yr, 10 test, 10 interview):", salary2[0])

print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

Predicted salary (2 yr, 9 test, 6 interview): 53290.892559447646
Predicted salary (12 yr, 10 test, 10 interview): 92268.07227783566
Intercept: 17237.330313727172
Coefficients: [2827.63404314 1912.93803053 2196.9753141 ]




In [14]:
df1 = pd.read_csv("1000_Companies.csv")
print(df.head())


   experience  test_score(out of 10)  interview_score(out of 10)  salary($)
0           0                    8.0                           9      50000
1           0                    8.0                           6      45000
2           5                    6.0                           7      60000
3           2                   10.0                          10      65000
4           7                    9.0                           6      70000


In [15]:
df1 = pd.get_dummies(df1, columns=['State'], drop_first=True)


In [16]:
X = df1.drop('Profit', axis=1)
y = df1['Profit']

model = LinearRegression()
model.fit(X, y)

input_data = pd.DataFrame({
    'R&D Spend': [91694.48],
    'Administration': [515841.3],
    'Marketing Spend': [11931.24],
    'State_Florida': [1],
    'State_New York': [0]
})

predicted_profit = model.predict(input_data)

print("Predicted Profit:", predicted_profit[0])

print(X.columns)


Predicted Profit: 510570.9926108309
Index(['R&D Spend', 'Administration', 'Marketing Spend', 'State_Florida',
       'State_New York'],
      dtype='object')
