In [None]:
# 1. Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# 2. Load the dataset
# Make sure salary_data.csv is in the same directory
dataset = pd.read_csv('salary_data.csv')
X = dataset.iloc[:, :-1].values # Independent variable (YearsExperience)
y = dataset.iloc[:, -1].values # Dependent variable (Salary)

# 3. Split the dataset into a training and testing set
# 80% of the data will be used for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Create and train the linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = regressor.predict(X_test)

# 6. Print the model's coefficients
print(f"Intercept (b0): {regressor.intercept_:.2f}")
print(f"Coefficient (b1): {regressor.coef_[0]:.2f}\n")

# 7. Visualize the training set results
plt.figure(figsize=(10, 6))
plt.scatter(X_train, y_train, color='red', label='Training data')
plt.plot(X_train, regressor.predict(X_train), color='blue', label='Regression line')
plt.title('Salary vs. Years of Experience (Training Set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.legend()
plt.show()

# 8. Visualize the testing set results
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='red', label='Testing data')
plt.plot(X_train, regressor.predict(X_train), color='blue', label='Regression line (from training)')
plt.title('Salary vs. Years of Experience (Test Set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.legend()
plt.show()

# 9. Predict a new value (optional)
new_experience = [[12]] # Must be a 2D array
predicted_salary = regressor.predict(new_experience)
print(f"Predicted salary for 12 years of experience: ${predicted_salary[0]:,.2f}")
