In [11]:
import numpy as np
from sklearn.linear_model import LinearRegression

# Example features and target
X = np.array([
    [2, 15, 40, 0],
    [5, 16, 45, 1],
    [3, 16, 40, 0],
    [10, 18, 50, 5],
    [7, 17, 45, 3],
    [1, 14, 35, 0],
    [8, 16, 45, 4],
    [4, 15, 40, 1],
    [6, 15, 42, 2],
    [12, 19, 55, 8]
])
y = np.array([15, 25, 18, 45, 35, 12, 38, 22, 30, 60])

# Fit the model
model = LinearRegression()
model.fit(X, y)

# Get coefficients
print(f"Intercept (β₀): {model.intercept_:.2f}")
print(f"Coefficients (β₁, β₂, β₃, β₄): {model.coef_}")

Intercept (β₀): 1.72
Coefficients (β₁, β₂, β₃, β₄): [1.97320904 0.11806371 0.20019818 2.57642763]


In [13]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Create a DataFrame from our data
columns = ['Experience (years)', 'Education (years)', 'Weekly working hours', 'Management experience (years)', 'Monthly salary (thousands ILS)']
data = [
    [2, 15, 40, 0, 15],
    [5, 16, 45, 1, 25],
    [3, 16, 40, 0, 18],
    [10, 18, 50, 5, 45],
    [7, 17, 45, 3, 35],
    [1, 14, 35, 0, 12],
    [8, 16, 45, 4, 38],
    [4, 15, 40, 1, 22],
    [6, 15, 42, 2, 30],
    [12, 19, 55, 8, 60]
]
df = pd.DataFrame(data, columns=columns)

# Prepare features (X) and target (y)
X = df.drop('Monthly salary (thousands ILS)', axis=1)
y = df['Monthly salary (thousands ILS)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Print model coefficients
print(f"Intercept (β₀): {model.intercept_:.2f}")
for i, col in enumerate(X.columns):
    print(f"Coefficient for {col} (β{i+1}): {model.coef_[i]:.2f}")

# Make predictions
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Evaluate the model
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print(f"\nTraining MSE: {train_mse:.2f}")
print(f"Testing MSE: {test_mse:.2f}")
print(f"Training R²: {train_r2:.2f}")
print(f"Testing R²: {test_r2:.2f}")

# Calculate adjusted R²
n = len(X_train)
p = X_train.shape[1]
adj_r2 = 1 - (1 - train_r2) * (n - 1) / (n - p - 1)
print(f"Adjusted R²: {adj_r2:.2f}")

# Make a prediction for a new salary
new_worker = np.array([[6, 16, 43, 2]])
predicted_salary = model.predict(new_worker)[0]
print(f"\nPredicted salary for a new worker: {predicted_salary:.2f} thousand ILS")

Intercept (β₀): -4.33
Coefficient for Experience (years) (β1): 1.55
Coefficient for Education (years) (β2): 1.06
Coefficient for Weekly working hours (β3): 0.01
Coefficient for Management experience (years) (β4): 3.06

Training MSE: 0.36
Testing MSE: 3.42
Training R²: 1.00
Testing R²: 0.45
Adjusted R²: 1.00

Predicted salary for a new worker: 28.63 thousand ILS


#### Each additional year of experience increases the salary by about 1,550 ILS
#### Each additional year of education increases the salary by about 1,060 ILS
#### Each additional hour of work increases the salary by about 10 ILS
#### Each additional year of management expirience increases the salary by about 3,060 ILS
#### Based on the above data, we can conclude that the greatest influence on salary has a management experience and a work experience