Title: Introduction to Scikit-Learn & Machine Learning Models

Task 1: Installing and Setting Up Scikit-Learn

In [None]:
# Write your code from here
# Task 1: Installing and Setting Up Scikit-Learn

# Step 1: Check if Python is installed
import sys
print(f"Your Python version: {sys.version}")

# Step 2: Install scikit-learn using pip (if not already installed)
# You'll typically run this command in your terminal or command prompt:
# pip install -U scikit-learn

# Note: It's best practice to do this within a virtual environment.
# If you don't have pip installed, you might need to install it first:
# https://pip.pypa.io/en/stable/installation/

# Step 3: Verify the installation
try:
    import sklearn
    print(f"Scikit-learn version: {sklearn.__version__}")
    print("Scikit-learn is successfully installed and set up!")
except ImportError:
    print("Scikit-learn is not installed. Please run 'pip install -U scikit-learn' in your terminal.")

# Optional: Install other useful libraries
# pip install numpy scipy pandas matplotlib seaborn


Task 2: Loading in-built Datasets

In [None]:
# Write your code from here
# Task 2: Loading In-built Datasets in Scikit-Learn

from sklearn import datasets

# Step 1: Load the Iris dataset
iris = datasets.load_iris()
print("Iris dataset loaded.")
print(f"Type of iris object: {type(iris)}")
print(f"Keys in iris object: {iris.keys()}")
print("\nIris feature names:", iris.feature_names)
print("Shape of Iris data:", iris.data.shape)
print("Shape of Iris target:", iris.target.shape)
print("\nFirst 5 samples of Iris data:\n", iris.data[:5])
print("\nTarget values for the first 5 samples:", iris.target[:5])
print("\nTarget names:", iris.target_names)

# Step 2: Load the Digits dataset
digits = datasets.load_digits()
print("\nDigits dataset loaded.")
print(f"Type of digits object: {type(digits)}")
print(f"Keys in digits object: {digits.keys()}")
print("\nDigits feature names (pixel intensities):", digits.feature_names[:10], "...") # Showing first 10
print("Shape of Digits data:", digits.data.shape)
print("Shape of Digits target:", digits.target.shape)
print("\nFirst sample of Digits data (as a flattened array):\n", digits.data[0])
print("\nTarget value for the first sample:", digits.target[0])
print("\nTarget names (digit classes):", digits.target_names)

# Step 3: Load the Breast Cancer dataset
breast_cancer = datasets.load_breast_cancer()
print("\nBreast Cancer dataset loaded.")
print(f"Type of breast_cancer object: {type(breast_cancer)}")
print(f"Keys in breast_cancer object: {breast_cancer.keys()}")
print("\nBreast Cancer feature names:", breast_cancer.feature_names)
print("Shape of Breast Cancer data:", breast_cancer.data.shape)
print("Shape of Breast Cancer target:", breast_cancer.target.shape)
print("\nFirst 5 samples of Breast Cancer data:\n", breast_cancer.data[:5])
print("\nTarget values for the first 5 samples:", breast_cancer.target[:5])
print("\nTarget names:", breast_cancer.target_names)

# Information about a dataset can be accessed using its DESCR attribute
print("\nDescription of the Iris dataset:\n", iris.DESCR[:500], "...") # Showing first 500 characters


Task 3: Understanding Data Structures

In [None]:
# Write your code from here
arr = [1, 2, 3, 4, 5]
print(arr[0])  # Accessing the first element


Title: Building a Simple ML Model in Scikit-Learn

Task 1: Simple Linear Regression
Implement linear regression with a small dataset

In [None]:
# Write your code from here
# Task 1: Simple Linear Regression with a Small Dataset

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Create a small synthetic dataset
np.random.seed(42)  # for reproducibility
X = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)  # Independent variable (feature)
y = np.array([2, 3.5, 5, 5.5, 7, 8]) + np.random.normal(0, 0.5, 6) # Dependent variable (target) with some noise

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

# Step 3: Create and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 5: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nMean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2) Score: {r2:.2f}")

# Step 6: Visualize the results
plt.figure(figsize=(8, 6))
plt.scatter(X_train, y_train, color='blue', label='Training Data')
plt.scatter(X_test, y_test, color='green', label='Testing Data')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Linear Regression Line')
plt.xlabel('X (Independent Variable)')
plt.ylabel('y (Dependent Variable)')
plt.title('Simple Linear Regression')
plt.legend()
plt.grid(True)
plt.show()

# Step 7: Print the model coefficients
print(f"\nIntercept (b0): {model.intercept_:.2f}")
print(f"Coefficient (b1): {model.coef_[0]:.2f}")

Task 2: Decision Tree Classifier
Build a decision tree model with the Iris dataset:

In [None]:
# Write your code from here
# Task 2: Decision Tree Classifier with the Iris Dataset

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names

print("Iris dataset loaded.")
print("Feature names:", feature_names)
print("Class names:", class_names)
print("Shape of data:", X.shape)
print("Shape of target:", y.shape)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print("\nData split into training and testing sets.")
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

# Step 3: Create and train a Decision Tree Classifier model
# You can adjust hyperparameters like max_depth, min_samples_split, etc.
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

print("\nDecision Tree Classifier model trained.")

# Step 4: Make predictions on the test set
y_pred = model.predict(X_test)

print("\nPredictions on the test set:", y_pred)
print("Actual values on the test set:", y_test)

# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of the Decision Tree Classifier: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 6: Visualize the Decision Tree (Optional, requires graphviz)
plt.figure(figsize=(12, 8))
plot_tree(model, feature_names=feature_names, class_names=class_names, filled=True)
plt.title("Decision Tree Visualization")
plt.show()

Task 3: K-Nearest Neighbors Classifier
Use the KNN algorithm on the digits dataset:

In [None]:
# Write your code from here


Title: Training a Classification Model

Task 1: Logistic Regression
Train a logistic regression model

In [None]:
# Write your code from here

Task 2: Support Vector Machine
Train a Support Vector Classifier on the Iris dataset

In [None]:
# Write your code from here

Task 3: Naive Bayes Classifier
Train a Gaussian Naive Bayes model

In [None]:
# Write your code from here

Title: Understanding Model Performance & Hyperparameter Tuning

Task 1: Using Confusion Matrix
Evaluate a model with a confusion matrix:

In [None]:
# Write your code from here

Task 2: Cross-validation Score
Perform cross-validation with k-fold:

In [None]:
# Write your code from here

Task 3: Hyperparameter Tuning using Grid Search
Optimize hyperparameters using GridSearchCV

In [None]:
# Write your code from here