In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import numpy as np

In [None]:
# Read the data from the CSV file with read from Numpy
data = np.genfromtxt("Salary_dataset.csv", delimiter=",", skip_header=1)

print(data)

In [None]:
# Separate the features (YearsExperience) from the target variable (Salary)
X = data[:, 1]
y = data[:, 2]

# X = X.reshape(-1, 1)
print(X, y)

In [None]:
#This time seaborn is forbidden, you have to find a library that works with numpy
import matplotlib.pyplot as plt

plt.scatter(X, y);
plt.ylim(-5000, 140000)
plt.xlim(0, 12);

In [None]:
# Let's create a function that displays the point line with the bar.
def visualize(theta, X, y):
    plt.scatter(X, y)

    plt.ylim(-5000, 140000)
    plt.xlim(0, 12)
    x_line = np.linspace(0, 12, 100)
    y_line = theta[0] + theta[1] * x_line
    plt.plot(x_line, y_line)
    plt.show()

In [None]:
# Ok, let's test our function now, you should get a result comparable to this one

theta = np.zeros(2)
visualize(theta, X, y)

In [None]:
# Create a function that multiplies each element of the matrix X by the slope of the model (theta[1]),
#followed by the addition of the intercept of the model (theta[0]), thus producing the predictions of the simple linear regression model.

def predict(X, theta):
    return [X * theta[1] + theta[0]]

In [None]:
def fit(X, y, theta, alpha, num_iters):
    # Initialize some useful variables
    m = X.shape[0]

    # Loop over the number of iterations
    for _ in range(num_iters):
        predictions = predict(X, theta)
        error = predictions - y
        theta[0] -= alpha * (1/m) * np.sum(error)
        theta[1] -= alpha * (1/m) * np.sum(X * error)
        # Perform one iteration of gradient descent (i.e., update theta once)
    return theta

In [None]:
# To begin, we'll set alpha to 0.01 and num_iters to 1000

theta = np.zeros(2)
finetuned_theta = fit(X, y, theta, 0.01, 1000)
print(finetuned_theta)

#You should have a result similar to this one: [21912.58918422329, 9880.814004608217]

In [None]:
# Ok, let's test our function now, you should get a result comparable to this one

theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 0), X, y)
theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 1), X, y)
theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 2), X, y)
theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 3), X, y)
theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 4), X, y)
theta = np.zeros(2)
visualize(fit(X, y, theta, 0.01, 1000), X, y)

In [None]:
def cost(X, y, theta):
    # Calculate the difference between model predictions and actual target values
    predictions = predict(X, theta)
    error = y - predictions

    # Calculate the squared sum of the loss and scale it by 1/(2 * number of samples)
    cost = np.sum(error ** 2) * (1 / (2 * error.shape[1]))

    # Return the computed cost as a measure of model fit
    return cost


In [None]:
# Test it with theta = [0,0]. You should get approximately 3251553638.

cost_for_theta_zero = cost(X, y, [0, 0])
print(cost_for_theta_zero)

In [None]:
def fit_with_cost(X, y, theta, alpha, num_iters):
    m = X.shape[0]  # Number of training examples
    J_history = []  # List to store cost values at each iteration

    # Loop over the specified number of iterations
    for itr in range(num_iters):
        # Calculate the loss (difference between predictions and actual values)
        predictions = predict(X, theta)
        error = y - predictions

        # Update the temporary values of theta for both coefficients using the gradient descent formula

        # Update the theta values

        # Calculate and append the cost for the current theta values to the history list
        cost = np.sum(error ** 2) * (1 / (2 * error.shape[1]))
        J_history.append(cost)

        # Perform one iteration of gradient descent (update theta values)
        theta[0] += alpha * (1/m) * np.sum(error)
        theta[1] += alpha * (1/m) * np.sum(X * error)

    # Return the final theta values and the list of cost values over iterations
    return (theta, J_history)

In [None]:
# First, we initialize theta to zero
theta = np.zeros(2)

# Start the training using your new function
theta, J_history = fit_with_cost(X, y, theta, 0.001, 100)

print(theta, J_history)

In [None]:
#You have to reproduce this graph
plt.plot(J_history)
plt.title("Cost Value Over Iterations")
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.show()

In [None]:
# Years of experience of the person you want to predict the salary for
years_experience = 10

# Predict the salary
predicted_salary = predict(years_experience, theta)[0]

# Display the predicted salary
print("Predicted salary for {} years of experience {}".format(years_experience, predicted_salary))