In [5]:
import numpy as np
from sklearn.model_selection import train_test_split

# Load data from CSV file
data = np.loadtxt('File_Name.csv', delimiter=',')
# Separate features (X) and target variable (y)
X = data[:, 0]
y = data[:, 1]

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define a function to fit the linear regression model
def fit(x_train, y_train):
    # Calculate the slope (m) using the least squares method
    num = (x_train * y_train).mean() - x_train.mean() * y_train.mean()
    den = (x_train ** 2).mean() - x_train.mean() ** 2
    m = num / den
    # Calculate the intercept (c) of the regression line
    c = y_train.mean() - m * x_train.mean()
    return m, c

# Define a function to predict the target variable
def predict(x, m, c):
    return m * x + c

# Define a function to calculate the coefficient of determination (R^2 score)
def score(y_truth, y_pred): 
    # Calculate the residual sum of squares (u) and total sum of squares (v)
    u = ((y_truth - y_pred) ** 2).sum()
    v = ((y_truth - y_truth.mean()) ** 2).sum()
    # Calculate R^2 score
    return 1 - u / v

# Define a function to calculate the mean squared error (cost function)
def cost(x, y, m, c):
    return ((y - m * x - c) ** 2).mean()

# Fit the linear regression model to the training data
m, c = fit(X_train, Y_train)

# Make predictions on both training and testing sets
y_test_pred = predict(X_test, m, c)
y_train_pred = predict(X_train, m, c)

# Print the R^2 score for the testing and training sets
print(round(score(Y_test, y_test_pred), 2))
print(round(score(Y_train, y_train_pred), 2))

# Print the mean squared error for the training set
print(round(cost(X_train, Y_train, m, c), 2))


0.922
0.904
