Title: Introduction to Scikit-Learn & Machine Learning Models

Task 1: Installing and Setting Up Scikit-Learn

In [None]:
import sklearn

print(f"Scikit-Learn version: {sklearn.__version__}")


Task 2: Loading in-built Datasets

In [None]:
# Write your code from here
from sklearn import datasets

# Load the Iris dataset
iris = datasets.load_iris()
# Load the Digits dataset
digits = datasets.load_digits()

print("Iris data keys:", iris.keys())
print("Digits data shape:", digits.data.shape)


Task 3: Understanding Data Structures

In [None]:
# Write your code from here
# Iris Dataset
print("Feature names:", iris.feature_names)
print("Target names:", iris.target_names)
print("First 5 rows of data:\n", iris.data[:5])
print("First 5 target values:", iris.target[:5])







Title: Building a Simple ML Model in Scikit-Learn

Task 1: Simple Linear Regression
Implement linear regression with a small dataset

In [None]:
# Write your code from here
from sklearn.linear_model import LinearRegression
import numpy as np

# Sample dataset
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

model = LinearRegression()
model.fit(X, y)

print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
print("Predicted value for 6:", model.predict([[6]]))











Task 2: Decision Tree Classifier
Build a decision tree model with the Iris dataset:

In [None]:
# Write your code from here
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


Task 3: K-Nearest Neighbors Classifier
Use the KNN algorithm on the digits dataset:

In [None]:
# Write your code from here
from sklearn.neighbors import KNeighborsClassifier

X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


Title: Training a Classification Model

Task 1: Logistic Regression
Train a logistic regression model

In [None]:
# Write your code from here
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


Task 2: Support Vector Machine
Train a Support Vector Classifier on the Iris dataset

In [None]:
# Write your code from here
from sklearn.svm import SVC

svc_model = SVC()
svc_model.fit(iris.data, iris.target)
y_pred = svc_model.predict(iris.data)

print("Accuracy:", accuracy_score(iris.target, y_pred))


Task 3: Naive Bayes Classifier
Train a Gaussian Naive Bayes model

In [None]:
# Write your code from here
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


Title: Understanding Model Performance & Hyperparameter Tuning

Task 1: Using Confusion Matrix
Evaluate a model with a confusion matrix:

In [None]:
# Write your code from here
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()


Task 2: Cross-validation Score
Perform cross-validation with k-fold:

In [None]:
# Write your code from here
from sklearn.model_selection import cross_val_score

scores = cross_val_score(knn, X, y, cv=5)
print("Cross-validation scores:", scores)
print("Average score:", scores.mean())


Task 3: Hyperparameter Tuning using Grid Search
Optimize hyperparameters using GridSearchCV

In [None]:
# Write your code from here
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)
