# LINEAR REGRESSION FOR MULTIPLE VARIABLES


# Employee Attrition Prediction

## Importing Relevant Packages

In [1]:
# Ignore a specific warning (e.g., DeprecationWarning)
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Import the pandas library as 'pd' for data manipulation and analysis
import pandas as pd

# Import the numpy library as 'np' for numerical operations and calculations
import numpy as np

# Import the matplotlib.pyplot library as 'plt' for data visualization
import matplotlib.pyplot as plt

# Import linear_model from the scikit-learn (sklearn) library for machine learning
from sklearn import linear_model

# Print a message to indicate that the necessary packages have been imported
print("Packages imported!")


Packages imported!


## Reading Dataset 

In [2]:
# Define the file path to the CSV file containing the dataset
path = "homeprices.csv"

# Use pandas to read the data from the CSV file into a DataFrame named 'df'
df = pd.read_csv(path)

# Display the first few rows of the DataFrame to inspect the data
df


Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [3]:
# Display summary information about the DataFrame 'df,' including data types, non-null values, and memory usage
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   area      6 non-null      int64  
 1   bedrooms  5 non-null      float64
 2   age       6 non-null      int64  
 3   price     6 non-null      int64  
dtypes: float64(1), int64(3)
memory usage: 324.0 bytes


## Data Cleaning

Handling the missing values for bedrooms with median score

In [4]:
median = df.bedrooms.median()

df.bedrooms.fillna(median, inplace = True)
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


## Training LinearRegression Model

In [5]:

# Select the independent variables (features) in 'X' by excluding the 'price' column
X = [col for col in df.columns if col != 'price']

# Define the dependent variable 'y' as the 'price' column
y = df['price']

# Create a LinearRegression model
reg = linear_model.LinearRegression()

# Fit the model using the selected independent variables 'X' and the dependent variable 'y'
reg.fit(df[X], y)


In [6]:
# Use the trained 'reg' Linear Regression model to predict a target variable.
# Input features: 'Area', 'bedrooms', and 'age' for a specific data point.

# Gather input from the user
area = float(input("Enter the area of the house: "))  # Convert input to float
bedrooms = int(input("How many bedrooms does the house have? "))  # Convert input to int
age = int(input("How old is the house? "))  # Convert input to int

# Create an input feature list with the user-provided values
input_features = [[area, bedrooms, age]]

# Make the prediction using the trained model.
predicted_value = reg.predict(input_features)
predicted_value = int(predicted_value)
# Print the predicted value, which is the estimated outcome.
print("Predicted Price Value of the House is :", predicted_value)


Enter the area of the house:  3000
How many bedrooms does the house have?  3
How old is the house?  40


Predicted Price Value of the House is : 498408




In [7]:
# Access the coefficient (slope) of the linear regression model, which represents the relationship between the input feature and the target variable
reg.coef_


array([  112.06244194, 23388.88007794, -3231.71790863])

In [8]:
# Access the intercept (bias) term of the linear regression model, which represents the value of the target variable when the input feature is zero
reg.intercept_


221323.0018654043

## Linear Regression Equation (MultiVariate)

### y = m1* x1 + m2 * x2 + m3*x3 +  b

### price =  area * 112.06244194 + bedrooms * 23388.88007794 + age *-3231.71790863 + 21323.0018654043