In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Load the dataset
path = "C:/dataset"
training_set = pd.read_csv(f'{path}/Salary_dataset.csv')

# Display the first few rows of the dataset
print(training_set.head())

# Extracting the independent variable (Years of Experience) and dependent variable (Salary)
x_train = training_set.iloc[:, 1].values  # All rows, all columns except the last (features)
y_train = training_set.iloc[:, -1].values   # All rows, only the last column (target variable)

#Display the extracted features and target variable
print("\n Dataset Info:")
print("Features (X_train):")
print(x_train)
print("\nTarget variable (y_train):")
print(y_train)

In [None]:
# Visualizing the dataset
plt.scatter(x_train, y_train, color='blue')
plt.title('Salary vs Years of Experience')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()

In [None]:
# Cost function implementation
def cost_function(x, y, w, b):
    n = len(x)
    cost_sum = 0

    for i in range(n):
        f = w * x[i] + b
        cost = (f - y[i]) ** 2
        cost_sum += cost
    
    total_cost = cost_sum / (2 * n)
    return total_cost


In [None]:
# Gradient function implementation
def gradian_function(x, y, w, b):
    n = len(x)
    dc_dw = 0
    dc_db = 0

    for i in range(n):
        f = w * x[i] + b

        dc_dw += (f - y[i]) * x[i]
        dc_db += (f - y[i])

    dc_dw = (1/n) * dc_dw
    dc_db = (1/n) * dc_db

    return dc_dw, dc_db

In [None]:
# Gradient Descent implementation
def gradient_descent(x, y, alpha, iterations):
    w = 0
    b = 0

    for i in range(iterations):
        dc_dw, dc_db = gradian_function(x, y, w, b)

        w = w - alpha * dc_dw
        b = b - alpha * dc_db

        print(f"Iteration {i}: Cost: {cost_function(x, y, w, b)}")

    return w, b

In [None]:
# Hyperparameters
iterations = 10000
alpha = 0.01

final_w, final_b = gradient_descent(x_train, y_train, alpha, iterations)
print(f"Final weight (w): {final_w}, Final bias (b): {final_b}")

In [None]:
# Visualizing the regression line
plt.scatter(x_train, y_train, color='blue', label='Training data')

x_vals = np.linspace(min(x_train), max(x_train), 100)
y_vals = final_w * x_vals + final_b
plt.plot(x_vals, y_vals, color='red', label='Regression line')

plt.title('Linear Regression Fit')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.legend()
plt.show()

In [None]:
# Making prediction for a specific input
years_of_experience = 5.5
predicted_salary = final_w * years_of_experience + final_b
print(f"Predicted salary for {years_of_experience} years of experience: {predicted_salary:.2f}")