<a href="https://colab.research.google.com/github/SURESHBEEKHANI/Machine-Learning-Regression-Algorithms-/blob/main/machine_learning_regression_algorithms_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np  # Importing NumPy for data manipulation operations
import pandas as pd  # Importing Pandas to handle data in table-like structures

# Setting up Matplotlib for plotting (data visualization)

import matplotlib.pyplot as plt  # Importing Matplotlib's pyplot module for creating visual plots
%matplotlib inline
import seaborn as sns  # Importing Seaborn for advanced data visualization techniques

import sklearn  # Importing scikit-learn for machine learning tasks

import warnings  # Importing warnings library to handle warning messages
warnings.filterwarnings('ignore')  # Ignoring all warning messages

# Setting default plot size for visualizations
plt.rcParams['figure.figsize'] = [10, 5]

# Importing warnings library again (not necessary, already imported)
warnings.simplefilter(action="ignore", category=FutureWarning)  # Ignoring future warnings

# The following script is written to be easily understood by someone without a technical background from Pakistan.
# Each line includes comments explaining its purpose in simple terms.


In [None]:
# This line of code imports the read_csv function from the pandas library.
# read_csv is a function that allows us to read data from a CSV (comma-separated values) file.
# We're using it to load a dataset stored in a file named "USA_Housing.csv".
# The data from the CSV file will be stored in a variable called 'data'.
import pandas as pd
data = pd.read_csv("/content/USA_Housing.csv")


In [None]:
# This line of code uses the shape function to display the dimensions of the dataset.
# The shape function returns a tuple where the first element represents the number of rows and the second element represents the number of columns.
# For example, if the shape of the dataset is (1000, 6), it means there are 1000 rows and 6 columns.
data.shape


In [None]:
# This line of code uses the head() function to display the first few rows of the dataset.
# The head() function is very useful when you want to quickly look at a small part of your data.
# By default, head() shows the first 5 rows, but you can specify a different number if you want.
# For example, data.head(10) would show the first 10 rows instead of the first 5.
data.head()


In [None]:
# This line of code uses the info() function to get detailed information about the dataset.
# The info() function provides a summary of the dataset, including the data types of each column and the number of non-null values.
# It also shows if there are any missing values in the dataset.
data.info()


In [None]:
data.info()

In [None]:
# We're removing the column labeled 'Address' from the data because
#it's not needed for training the model.
data.drop(columns='Address', axis=1, inplace=True)


In [None]:
# Removed null values from the dataset
data.dropna(inplace=True)


In [None]:
# Using the describe() function to generate a summary of statistics, including the five-number summary (minimum, 25th percentile, median, 75th percentile, maximum) for each numerical column in the dataset.
data.describe()


In [None]:
# Check the final shape of the data before training the model
data.shape

# Train the model using the finalized data
# Insert your model training code here


In [None]:
# Separate the Features and Target Variable
# Feature Representation: x represents the input variables
# Target Variable Representation: y represents the output variable

# Drop the 'Price' column from the DataFrame and assign the remaining data to variable x
# This step selects the features (input variables) for our model
x = data.drop('Price', axis=1)

# Select the 'Price' column from the DataFrame and assign it to variable y
# This step selects the target variable (output variable) for our model
y = data['Price']


In [None]:
# Get the shape of the feature data for training
x.shape


In [None]:
# Get the shape of the target variable for both training and testing data
y.shape


In [None]:
# Import MinMaxScaler for feature scaling technique. This helps in adjusting the scale of features without changing their relationship.
from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler, which is a tool used to scale features to a specified range (usually between 0 and 1).
scaler = MinMaxScaler()

# Apply Min-Max scaling to your features (X). This step rescales the data so that each feature is within a specific range.
x_scaled = scaler.fit_transform(x)

# The variable x_scaled now contains the scaled features using Min-Max scaling. This ensures that all features have a consistent scale for better model performance.


In [None]:
# Print the original values of x
print("Original values of x:")
print(x)

# Print the scaled values of x after Min-Max scaling
print("\nScaled values of x after Min-Max scaling:")
print(x_scaled)


In [None]:
# Splitting the data into training and testing sets
# Importing train_test_split function from sklearn library, which helps in splitting the data
from sklearn.model_selection import train_test_split

# Split the scaled features (x_scaled) and target variable (y) into training and testing sets
# The test_size parameter specifies the proportion of the dataset to include in the testing set
# Here, 10 samples will be used for testing
# The random_state parameter ensures reproducibility of the split
# It sets a seed for random number generation, so the same split can be reproduced if needed
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=10, random_state=101)


In [None]:
# Importing the LinearRegression class from the scikit-learn library, which is used to create a linear regression model
from sklearn.linear_model import LinearRegression

# Initialize the linear regression model
# This step creates an instance of the LinearRegression class, representing our linear regression model
linear_model = LinearRegression()

# You can also specify additional parameters for the model if needed
# For example, to fit the intercept, you can set fit_intercept=True:
# This line fits the linear regression model to the training data
linear_model.fit(x_train, y_train)

# Now, the linear_model object represents your linear regression model
# It can be used to fit the model to the training data and make predictions


In [None]:
# Predicting with the linear regression model
# This line of code predicts the target variable (y) using the trained linear regression model (linear_model) and the test features (x_test)
y_pred = linear_model.predict(x_test)

# Printing the shape of the predicted values
# This line prints the shape of the predicted values (y_pred), which indicates the number of predictions made
print(y_pred.shape)

# Printing the predicted values
# This line prints the predicted values of the target variable (y) based on the test features (x_test)
print(y_pred)


In [None]:
# Plotting the actual values vs. the predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue', label="Actual data Point ")

# Adding a line for perfect prediction (y = x)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', label='Ideal Line ')

# Adding titles and labels
plt.title('Actual vs Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')

# Displaying the plot
plt.show()