In [None]:
#1.SPLITTING A DATASET INTO TRAINING AND TESTING SETS USING PYTHON'S SKLEARN LIBRARY

  
    
##We can split a dataset into training and testing sets using the train_test_split function from scikit-learn. 
#Here's a brief code snippet demonstrating how to do it:

from sklearn.model_selection import train_test_split

#Assuming X is features and y is target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#-X_train and y_train will contain the training data
#-X_test and y_test will contain the testing data

In this code snippet:

#-X represents the Features of the dataset.
#-y represents the Target variable of the dataset.
#-test_size specifies the proportion of the dataset to include in the testing split. Here, it's set to 0.2, meaning 20% of the data will be used for testing, and 80% will be used for training.

#-random_state sets the seed for random number generation. It ensures that the data split is reproducible across multiple runs. It's optional but recommended for reproducibility.

#-After running this code, X_train and y_train will contain the training data, while X_test and y_test will contain the testing data.We can then use these splits to train and evaluate our machine learning models.


In [2]:
#2.A PYTHON FUNCTION TO PERFORM SIMPLE LINEAR REGRESSION FOR A GIVEN DATASET,
#USING ONLY BASIC MATHEMATICAL OPERATIONS.


#Python function to perform simple linear regression using basic mathematical operations:

def simple_linear_regression(X, y):
    n = len(X)
    
    # Calculate the mean of X and y
    mean_X = sum(X) / n
    mean_y = sum(y) / n
    
    # Calculate the slope (m) and intercept (b)
    numerator = sum((X[i] - mean_X) * (y[i] - mean_y) for i in range(n))
    denominator = sum((X[i] - mean_X) ** 2 for i in range(n))
    
    m = numerator / denominator
    b = mean_y - m * mean_X
    
    return m, b

# Example:
X = [1, 2, 3, 4, 5]
y = [2, 3, 4, 5, 6]

m, b = simple_linear_regression(X, y)
print("Slope:", m)
print("Intercept:", b)


Slope: 1.0
Intercept: 1.0


In [3]:
#3.IMPLEMENTING A PYTHON FUNCTION TO CALCULATE THE MEAN ABSOLUTE ERROR(MAE)FOR A REGRESSION MODEL,
#WITHOUT USING   ANY EXTERNAL LIBRARIES.

def mean_absolute_error(y_true, y_pred):
    n = len(y_true)
    
    # Calculate the absolute errors
    abs_errors = [abs(y_true[i] - y_pred[i]) for i in range(n)]
    
    # Calculate the mean absolute error
    mae = sum(abs_errors) / n
    
    return mae

# Example usage:
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]

mae = mean_absolute_error(y_true, y_pred)
print("Mean Absolute Error (MAE):", mae)


Mean Absolute Error (MAE): 0.5


In [4]:
#4.CREATING A PYTHON CODE TO BUILD A LOGISTIC REGRESSION MODEL USING SKLEARN.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset 
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the logistic regression model
logistic_regression = LogisticRegression()

# Train the model on the training data
logistic_regression.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = logistic_regression.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
#5.A PYTHON FUNCTION TO CLASSIFY A NEW DATA POINT USING K-NEAREST NEIGHBORS(KNN)ALGORITHM
#WITHOUT USING SKLEARN.



import numpy as np

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points.
    """
    return np.sqrt(np.sum((point1 - point2)**2))

def knn_predict(X_train, y_train, X_test, k):
    """
    Predict the class label for a new data point using KNN algorithm.
    """
    # Initialize list to store predicted labels
    y_pred = []
    
    # For each data point in the test set
    for test_point in X_test:
        # Calculate distances to all points in the training set
        distances = [euclidean_distance(test_point, train_point) for train_point in X_train]
        
        # Get indices of k nearest neighbors
        nearest_indices = np.argsort(distances)[:k]
        
        # Get labels of k nearest neighbors
        nearest_labels = [y_train[i] for i in nearest_indices]
        
        # Predict the most common label among k nearest neighbors
        predicted_label = max(set(nearest_labels), key=nearest_labels.count)
        
        # Append predicted label to the list
        y_pred.append(predicted_label)
    
    return np.array(y_pred)

# Example:
X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y_train = np.array([0, 0, 1, 1])
X_test = np.array([[1.5, 2.5], [3.5, 4.5]])
k = 3

y_pred = knn_predict(X_train, y_train, X_test, k)
print("Predicted Labels:", y_pred)


Predicted Labels: [0 1]


In [6]:
#6.DEVELOPING A PYTHON CODE TO TRAIN A DECISION TREE CLASSIFIER USING SKLEARN WITH A STRAIGHTFORWORD EXAMPLE.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
decision_tree_classifier = DecisionTreeClassifier()

# Train the classifier on the training data
decision_tree_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = decision_tree_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

