# LINEAR REGRESSION

# Predicting the house price based on size 

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
# data size of house (sq ft) and price 
x = np.array([1500, 1600, 1700, 1800, 1900]).reshape(-1, 1)
y = np.array([300000, 320000, 340000, 360000, 380000])

# model
model = LinearRegression()
model.fit(x, y)

# prediction
y_pred = model.predict(x)

# plot
plt.scatter(x, y)
plt.plot(x, y_pred, color='red')
plt.xlabel('Size (sq ft)')
plt.ylabel('Price')
plt.title('Linear Regression')

# LOGISTIC REGRESSION: 
is used to predict binary outcomes (e.g: YES/NO, 1/0, Pass/Fail)

# Predicting whether a student will pass or fail based on study hours

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

# Data: hours studied and pass/fail outcome
X = np.array([5, 10, 15, 20, 25]).reshape(-1, 1)
y = np.array([0, 0, 1, 1, 1])

# Model
model = LogisticRegression()
model.fit(X, y)

# Prediction
y_predict = model.predict(X)

# Display prediction
print('Predicted values:', y_predict)

# Plotting
plt.scatter(X, y, color='blue', label='Data')
plt.plot(X, y_predict, color='red', label='Logistic Regression')
plt.xlabel('Hours Studied')
plt.ylabel('Pass/Fail')
plt.title('Logistic Regression')
plt.legend()

# POLYNOMIAL REGRESSION
Ploynomial regresssion is an extension of linear regression that allows for the modeling of relationships between the independent and dependent variables as an nth degree polynomial.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
# Sample data (x -> houries studied, y -> test score)
x = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
y = np.array([1, 4, 9, 16, 25])

# transform features to include polynomial terms(degree 2)
poly = PolynomialFeatures(degree = 2)
x_poly = poly.fit_transform(x)

# fit the polynomial regression model
model = LinearRegression()
model.fit(x_poly, y)

# predict values
x_fit = np.linspace(0, 6, 100).reshape(-1, 1)
x_fit_poly = poly.transform(x_fit)
y_pred = model.predict(x_fit_poly)

# plot results
plt.scatter(x,  y, color='red', label='data points')
plt.plot(x_fit, y_pred, color='blue', label='Polynomial fit')
plt.xlabel('Hours studied')
plt.ylabel('Test scores')
plt.title('POLYNOMIAL REGRESSION (DEGREE 2)')
plt.legend()

# DECISION TREE
Decision tree classify the data by spliting the data into subsets based on the feature values, creating a tree-like model of decision.

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# Sample data (x -> feature, y -> labels)
x = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 1, 1, 1])

# Create and fit the model
model = DecisionTreeClassifier()
model.fit(x, y)

# Predicting new data
new_data = np.array([[2.5], [3.5]])
predictions = model.predict(new_data)
print(predictions)


# Random Forest
Random Forest is an ensemble method that uses multiple decision tree to improve classification accuracy

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier

# Sample data (x -> feature, y -> labels)
x = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 1, 1, 1])

# Create and fit the model
model = RandomForestClassifier()
model.fit(x, y)

# Predicting new data
new_data = np.array([[2.5], [3.5]])
predictions = model.predict(new_data)
print(predictions)

# SVM (Support Vector Machine)
SVM is used for classification by finding the hyperplane that best seperates the classes in feature space.
1. SVC : Support Vector Classifier

In [None]:
import numpy as np
from sklearn.svm import SVC

# Sample data (x -> feature, y -> labels)
x = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 1, 1, 1])

# Create and fit the model
model = SVC(kernel='linear')
model.fit(x, y)

# Predicting new data
new_data = np.array([[2.5], [3.5]])
predictions = model.predict(new_data)
print(predictions)

# KNN(Kth-NEAREST NEIGHNOUR) CLASSIFIER
KNN is a non-parametric method used for classification by finding the majority class among the k-nearest neighnours of a data point.

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# Sample data (x -> feature, y -> labels)
x = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 1, 1, 1])

# Create and fit the model
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x, y)

# Predicting new data
new_data = np.array([[2.5], [3.5]])
predictions = model.predict(new_data)
print(predictions)

# NAIVE BAYES
Probablistic ML Algorithm based on Bayes Theorm
The formula for Bayes’ Theorem is:
# P(A∣B) = ​P(B|A) * P(A) / P(B)
Where:
1. ( P(A|B) ) is the probability of event ( A ) occurring given that ( B ) is true
2. 
( P(B|A) ) is the probability of event ( B ) occurring given that ( A ) is 
reu
3. 
( P(A) ) is the probability of event ( A ) occuaneinrr
4. .
( P(B) ) is the probability of event ( B ) occurring.

In [None]:
import numpy as np
from sklearn.naive_bayes import GaussianNB

x = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y = np.array([0, 0, 1, 1, 1])

model = GaussianNB()
model.fit(x, y)

# Predicting new data
new_data = np.array([[2, 2], [4, 4]])
predictions = model.predict(new_data)
print(predictions)

# ADABOOST ALGORITHM (ADAPTIVE BOOST)
AdaBoost, or Adaptive Boosting, is an ensemble learning technique that combines multiple weak classifiers to create a strong classifier. Here’s a detailed explanation and a Python

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Load dataset
iris = load_iris()
x = iris.data
y = iris.target

# Train test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# Create and train the AdaBoost model
base_model = DecisionTreeClassifier(max_depth=1)
model = AdaBoostClassifier(estimator=base_model, n_estimators=50)
model.fit(x_train, y_train)

# Make prediction
predictions = model.predict(x_test)
print('Accuracy:', accuracy_score(y_test, predictions))

# XGBOOST ALGORITHM
XGBoost (eXtreme Gradient Boosting) is a powerful and efficient implementation of the gradient boosting framework.
xgb : eXtreme Gradient Boosting

In [None]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Load the Iris dataset
x, y = load_iris(return_X_y=True)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create DMatrix for XGBoost
train_dmatrix = xgb.DMatrix(x_train, label=y_train)
test_dmatrix = xgb.DMatrix(x_test, label=y_test)

# Define the parameters for the model
params = {'objective': 'multi:softmax', 'num_class': 3, 'eval_metric': 'mlogloss'}

# Train the model
model = xgb.train(params, train_dmatrix, num_boost_round=10)

# Make predictions
y_pred = model.predict(test_dmatrix)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
# 