# **Simple Linear Regression (Code from Scratch)**

In [29]:
class MyLR:
  """
  This is a simple linear regression model.

  Attributes:
    m: The slope of the line.
    b: The y-intercept of the line.
  """

  def __init__(self):
    """
    Initializes the model.
    """
    self.m = None
    self.b = None

  def fit(self, X_train, y_train):
    """
    Fits the model to the training data.

    Args:
      X_train: The training data features.
      y_train: The training data labels.
    """

    # Calculate the numerator and denominator of the slope equation.
    num = 0
    den = 0

    for i in range(X_train.shape[0]):
      num = num + ((X_train[i] - X_train.mean()) * (y_train[i] - y_train.mean()))
      den = den + ((X_train[i] - X_train.mean())*(X_train[i] - X_train.mean()))

    # Calculate the slope and y-intercept.
    self.m = num/den
    self.b = y_train.mean() - (self.m * X_train.mean())

    # Print the slope and y-intercept.
    print(self.m)
    print(self.b)

  def predict(self, X_test):
    """
    Predicts the labels for the test data.

    Args:
      X_test: The test data features.

    Returns:
      The predicted labels.
    """

    return self.m * X_test + self.b

In [30]:
# pip install numpy
# pip install pandas

# import numpy and pandas
import numpy as np
import pandas as pd

In [31]:
# load data from csv file
df = pd.read_csv('/content/placement.csv')
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [32]:
# Dividing data into X (input) and y (output)
X = df.iloc[:, 0].values # values to extract numpy array
y = df.iloc[:, 1].values

In [33]:
# pip install scikit-learn

# Splitting the data into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [34]:
# creating object
lr = MyLR()

In [35]:
# train the model on data
lr.fit(X_train, y_train)

In [26]:
# prediction
lr.predict(X_test[0])

3.891116009744203