In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('/content/sample_data/hiring.csv')

print(df.head())

# Fill missing values if any
df.fillna(0, inplace=True)

# Map word-form experience to numerical values
word_to_num = {
    'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
    'eleven': 11, 'twelve': 12
}

df['experience'] = df['experience'].apply(lambda x: word_to_num.get(str(x).lower(), x))

# Ensure 'experience' column is numeric (in case some values were already numbers or converted to 0 by fillna)
df['experience'] = pd.to_numeric(df['experience'])

X = df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']]
y = df['salary($)']

model = LinearRegression()
model.fit(X, y)

# Predictions
pred1 = model.predict([[2,9,6]])
pred2 = model.predict([[12,10,10]])

print("Predicted salary (2yr,9,6):", pred1[0])
print("Predicted salary (12yr,10,10):", pred2[0])

  experience  test_score(out of 10)  interview_score(out of 10)  salary($)
0        NaN                    8.0                           9      50000
1        NaN                    8.0                           6      45000
2       five                    6.0                           7      60000
3        two                   10.0                          10      65000
4      seven                    9.0                           6      70000
Predicted salary (2yr,9,6): 52688.252227972924
Predicted salary (12yr,10,10): 92940.63332618751




In [9]:
df = pd.read_csv('/content/sample_data/1000_Companies.csv')

print(df.head())

# Encode categorical variable (State)
df = pd.get_dummies(df, drop_first=True)

X = df.drop('Profit', axis=1)
y = df['Profit']

model = LinearRegression()
model.fit(X, y)

# Prepare input (Florida encoded)
input_data = [[91694.48, 15841.3, 11931.24, 1, 0]]

profit_pred = model.predict(input_data)

print("Predicted Profit:", profit_pred[0])


   R&D Spend  Administration  Marketing Spend       State     Profit
0  165349.20       136897.80        471784.10    New York  192261.83
1  162597.70       151377.59        443898.53  California  191792.06
2  153441.51       101145.55        407934.54     Florida  191050.39
3  144372.41       118671.85        383199.62    New York  182901.99
4  142107.34        91391.77        366168.42     Florida  166187.94
Predicted Profit: -2552.322172547676


