In [None]:
from tkinter import *
from tkinter import filedialog
import cv2 as cv

# A function to allow the user to select the image they wish to analyse. 
# Function inputs args: None. 
# Function output 1: The file path of that which was selected by the user. 
def file_selection_dialog():
    root = Tk()
    root.title('Please select the file in question')
    root.filename = filedialog.askopenfilename(initialdir="/", title="Select A File", filetypes=[("All files", "*.*")])
    file_path = root.filename
    root.destroy()

    return file_path

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import csv 
import pandas as pd

# Function inputs args 1: file_path --> Input as string. The file path for the data in question.
def linear_regression(file_path): 
    
    # Import the csv file. 
    data = pd.read_csv(file_path)
    X = data.iloc[:, 0]
    Y = data.iloc[:, 1]
    
    # Build the linear regression model. 
    m = 0 # 'm' represents the gradient. 
    c = 0 # 'c' represents the y intercept. 
    a = 0.0001  # The learning rate: Rate at which the suggested line of best fit is altered each iteration. If this value is too great, then instead of converging towards alues of m and c which give the best line of fit, we may diverge from it. 
    iterations = 1000  # The number of iterations to perform gradient descent.

    n = len(X) # Number 'truth' data points from our .csv file. 

    # Performing Gradient Descent 
    for i in range(iterations): 
        predicted_values = m*X + c  # The current predicted value of Y
        
        derivative_m = (-2/n) * sum(X * (Y - predicted_values))  # Calculate the derivative with respect to 'm', the gradient. 
        derivative_c = (-2/n) * sum(Y - predicted_values)  # Calculate the derivative with respect to 'c', the y-intercept. 
        
        # Here, we update the values of m and c. It is important to perform this update after the derivatives of *both* have been taken. 
        m = m - a * derivative_m  # Update m.
        c = c - a * derivative_c  # Update c.

    # Making predictions
    predicted_values = m*X + c
    
    # Plot the original data with the new linear regression line.
    plt.scatter(X, Y) 
    plt.plot([min(X), max(X)], [min(predicted_values), max(predicted_values)], color='red')  
    plt.rcParams.update({'font.size': 15})
    plt.ylabel('Y data', labelpad=10) # The leftpad argument alters the distance of the axis label from the axis itself. 
    plt.xlabel('X data', labelpad=10)
    plt.title('Training data and linear line of best fit', pad=15)
    plt.show()
    
    return (m, c)