# Import Dataset and libraries

In [25]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
df = pd.read_csv('placement.csv')
df.sample(4)

Unnamed: 0,cgpa,package
97,5.98,2.84
191,7.28,3.48
2,7.82,3.25
38,8.62,4.36


In [None]:
# see how our data looks like 

fig = px.scatter(
    df,
    x= df['cgpa'],
    y= df['package'], 
)

fig.show()

# Train Test Split

In [9]:
X = df.iloc[:, 0:1]
y = df.iloc[:, -1]

In [None]:
# X -> input
# y -> target/ output
# This is simple so this is simple linear regression 😆
print(X.shape, y.shape)

(200, 1) (200,)


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [11]:
print(X_train.shape, X_test.shape)

(160, 1) (40, 1)


In [13]:
# make a object of linear regression
lr = LinearRegression()

In [15]:
# now learn the parameters
lr.fit(X_train, y_train)

In [16]:
X_train.head()

Unnamed: 0,cgpa
137,7.14
163,8.93
111,5.42
123,5.1
109,7.77


In [17]:
y_test.head()

112    4.10
29     3.49
182    2.08
199    2.33
193    1.94
Name: package, dtype: float64

# Now Predict

In [None]:
# this predict the whole test data
lr.predict(X_test)

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

- if we want to only predict the first value of the data
- we predict for the first value that is 8.58
- our model predict the 3.8 but actual is 4.10


array([[8.58]])

In [49]:
X_test.iloc[0]

cgpa    8.58
Name: 112, dtype: float64

In [24]:
lr.predict(X_test.iloc[[0]])

array([3.89111601])

# Error

### ℹ️ What they tell you:
- **R² Score** → how much of the variance in the target is explained by the model (closer to 1 is better).
- **MSE** → penalizes larger errors more than smaller ones.
- **MAE** → average absolute difference between predicted and actual values (more interpretable in some cases).

Let me know if you want to visualize the residuals too — that's also super helpful in regression analysis!

In [27]:
# Predict on test set
y_pred = lr.predict(X_test)

# R² Score
r2 = r2_score(y_test, y_pred)

# Mean Squared Error
mse = mean_squared_error(y_test, y_pred)

# Mean Absolute Error
mae = mean_absolute_error(y_test, y_pred)

print(f"R² Score: {r2:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")

R² Score: 0.7807
Mean Squared Error: 0.1213
Mean Absolute Error: 0.2885


# Plot the line

In [None]:
# Scatter plot for actual data
scatter = go.Scatter(
    x= df['cgpa'],
    y= df['package'],
    mode= 'markers',
    name= 'Actual Data'
)

# for prediction line
line = go.Scatter(
    x= X_train.squeeze(), # squeeze in case it's a single feature
    y=  lr.predict(X_train),
    mode='lines',
    name= 'Linear Regression Line',
)

# combine both
fig = go.Figure(data=[scatter, line])

# Set Axis labels
fig.update_layout(
    title= "CGPA vs Package",
    xaxis_title= "CGPA",
    yaxis_title= "Package (in LPA)",
    template='plotly_white',
)
fig.show(render='iframe')

In [43]:
# This is the slope
m = lr.coef_
m

array([0.55795197])

In [44]:
# intercept
b = lr.intercept_
b

np.float64(-0.8961119222429144)

In [45]:
# y = mx + b
m * 8.58 + b

array([3.89111601])

In [46]:
m * 9.5 + b

array([4.40443183])

In [47]:
m * 100 + b

array([54.89908542])

In [48]:
m * 7.14 + b

array([3.08766517])

In [77]:
def predict_y_of_given_x(x: float = 0, num:float = 0.00) -> int:
    """ This function help to predict the function if you give the 'x' value."""
    
    m = lr.coef_ # slope
    b = lr.intercept_ # intercept
    # equation
    y = m * x + b
    print(f"Manual Calculated y is {y}")
    
    # Check if the user provide value is already present is dataframe
    if (df['cgpa'] == num ).any():
        value = df[df['cgpa'] == num]['cgpa'].values.reshape(-1, 1)
        predict_value = lr.predict(value)
    print(f"Sklearn predict this value is {predict_value}")
        
    
    

In [76]:
predict_y_of_given_x(8.58, 8.58)

Manual Calculated y is [3.89111601]
Sklearn predict this value is [3.89111601]



X does not have valid feature names, but LinearRegression was fitted with feature names



In [78]:
def predict_y_of_given_x(x, num):
    # manually calculated slope and intercept
    m = lr.coef_[0]
    b = lr.intercept_

    # equation
    y = m * x + b
    print(f"Your y is {y}")

    # Check if 'num' exists in the df['cgpa']
    if not df[df['cgpa'] == num].empty:
        value = pd.DataFrame({'cgpa': [num]})
        predicted = lr.predict(value)[0]
        print(f"Sklearn predicts this value is {predicted}")
    else:
        print(f"No entry in dataset with cgpa = {num}")
