In [None]:
# 📊 Simple Linear Regression on Salary Dataset
# This notebook demonstrates how to build a Simple Linear Regression model
# to predict salary based on years of experience using the given dataset.

# -----------------------
# Import Libraries
# -----------------------
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [None]:
# -----------------------
# Load Dataset
# -----------------------
df = pd.read_csv("Salary_dataset.csv")
df = df.drop(columns=["Unnamed: 0"])  # Drop unnecessary column

In [None]:
# Display first 5 rows
df.head()

In [None]:
# Dataset Info
df.info()

In [None]:
# -----------------------
# Split Dataset
# -----------------------
X = df[["YearsExperience"]]
y = df["Salary"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# -----------------------
# Train Model
# -----------------------
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

In [None]:
# -----------------------
# Evaluation
# -----------------------
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Intercept:", model.intercept_)
print("Coefficient:", model.coef_[0])
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)

In [None]:
# -----------------------
# Visualization
# -----------------------
plt.scatter(X_test, y_test, color="blue", label="Actual")
plt.plot(X_test, y_pred, color="red", linewidth=2, label="Predicted")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.title("Simple Linear Regression (Salary vs Experience)")
plt.legend()
plt.show()

In [None]:
# -----------------------
# Save Predictions to CSV (Optional)
# -----------------------
results = pd.DataFrame({
    "YearsExperience": X_test.values.flatten(),
    "Actual Salary": y_test.values,
    "Predicted Salary": y_pred
})
results.to_csv("Salary_Predictions.csv", index=False)
results.head()