# Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load Dataset

In [None]:
data = pd.read_csv("Dataset/extended_salary_data.csv")

# Explore Dataset

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
pd.DataFrame({"Mean": data.mean(),
              "Median": data.median(),
              "Variance": data.var(),
              "Standard Deviation": data.std()
              })

# Tasks

## Task 1

In [None]:
plt.figure(figsize=(8, 5))
plt.scatter(data["YearsExperience"], data["Salary"])
plt.xlabel("Years of Experience")
plt.ylabel("Salary (USD)")
plt.title("Salary vs Years of Experience")
plt.grid(True)
plt.show()

In [None]:
pd.DataFrame({"Mean": data.mean(),
              "Median": data.median(),
              "Variance": data.var(),
              "Standard Deviation": data.std()
              })

## Task 2

In [None]:
X = data[["YearsExperience"]]
y = data["Salary"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
slope = model.coef_[0]
intercept = model.intercept_

print(f"Slope (Coefficient): {slope}")
print(f"Intercept: {intercept}")

## Task 3

In [None]:
y_pred = model.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")

# Visualisation of Regression Line

In [None]:
plt.figure(figsize=(8, 5))
plt.scatter(X_test, y_test, label="Actual Data")
plt.plot(X_test, y_pred, color="red", label="Regression Line")
plt.xlabel("Years of Experience")
plt.ylabel("Salary (USD)")
plt.title("Linear Regression on Test Data")
plt.legend()
plt.grid(True)
plt.show()