# PART 2

## Task 1: Classical ML with Scikit-learn

In [4]:
# Iris Dataset Classification using Decision Tree

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [5]:
# Load dataset (assuming 'Iris.csv' is in the notebook directory)

df = pd.read_csv("Iris.csv")

In [6]:
# Drop Id column if exists
df.drop("Id", axis=1, inplace=True)

In [7]:
# Handle missing values
df.fillna(df.mean(numeric_only=True), inplace=True)


In [8]:
# Encode species labels
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])

In [9]:
# Split features and target
X = df.drop("Species", axis=1)
y = df["Species"]

In [10]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Train Decision Tree model
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [12]:
# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


## Task 2: Deep Learning with TensorFlow/PyTorch

In [1]:
# MNIST CNN Classification with PyTorch

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [2]:
# 1. Transform and Load Data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='.', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='.', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


In [3]:
# 2. Define CNN Model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64*5*5, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))         # [64, 32, 26, 26]
        x = F.max_pool2d(x, 2)            # [64, 32, 13, 13]
        x = F.relu(self.conv2(x))         # [64, 64, 11, 11]
        x = F.max_pool2d(x, 2)            # [64, 64, 5, 5]
        x = x.view(-1, 64*5*5)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

model = CNNModel()

In [4]:
# 3. Training Setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [5]:
# 4. Train the Model
for epoch in range(5):
    model.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} completed")

Epoch 1 completed
Epoch 2 completed
Epoch 3 completed
Epoch 4 completed
Epoch 5 completed


In [6]:
# 5. Evaluate the Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 99.23%


## Task 3: NLP with spaCy

In [4]:
# Part 2 Task 3: NLP using spaCy on Amazon Reviews

import spacy
from textblob import TextBlob
import pandas as pd

In [5]:
# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

In [7]:
# Load dataset (ensure 'reviews.csv' is downloaded from Kaggle and available)
# This dataset should contain a 'reviewText' column
df = pd.read_csv("amazon_reviews_sample.csv")
df = df.dropna(subset=['reviewText'])  # Remove missing reviews


In [8]:
# Pick 5 random sample reviews
sample_reviews = df['reviewText'].sample(5, random_state=1)


In [10]:
# Analyze each review
for review in sample_reviews:
    print("\nReview:", review)

    # Named Entity Recognition
    doc = nlp(review)
    print("Named Entities:")
    for ent in doc.ents:
        print(f" - {ent.text} ({ent.label_})")

    # Sentiment Analysis using TextBlob
    blob = TextBlob(review)
    sentiment = "Positive" if blob.sentiment.polarity > 0 else "Negative" if blob.sentiment.polarity < 0 else "Neutral"
    print("Sentiment:", sentiment)


Review: Highly recommend the Samsung Galaxy – it's so fast!
Named Entities:
Sentiment: Positive

Review: Terrible product. The Apple charger stopped working in 2 days.
Named Entities:
 - Apple (ORG)
 - 2 days (DATE)
Sentiment: Negative

Review: Not happy with the quality of this Nike shirt.
Named Entities:
 - Nike (ORG)
Sentiment: Negative

Review: This Sony speaker has amazing sound quality!
Named Entities:
 - Sony (ORG)
Sentiment: Positive

Review: The packaging for this Logitech mouse was great.
Named Entities:
 - Logitech (ORG)
Sentiment: Positive


# Part 3: Ethics & Optimization

## 2. Troubleshooting Challenge

In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [15]:
# Step 1: Load MNIST data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [16]:
# Step 2: Preprocess the data
x_train = x_train / 255.0
x_test = x_test / 255.0

In [17]:
# Reshape for CNN input: (samples, height, width, channels)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

In [18]:
# Step 3: Define the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')  # 10 output classes for MNIST
])

In [19]:
# Step 4: Compile the model with proper loss function
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
# Step 5: Train the model
model.fit(x_train, y_train, epochs=5, validation_split=0.1)


Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 22ms/step - accuracy: 0.8947 - loss: 0.3382 - val_accuracy: 0.9810 - val_loss: 0.0665
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 27ms/step - accuracy: 0.9842 - loss: 0.0505 - val_accuracy: 0.9882 - val_loss: 0.0432
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 27ms/step - accuracy: 0.9887 - loss: 0.0342 - val_accuracy: 0.9878 - val_loss: 0.0435
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 27ms/step - accuracy: 0.9918 - loss: 0.0254 - val_accuracy: 0.9882 - val_loss: 0.0419
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 27ms/step - accuracy: 0.9941 - loss: 0.0181 - val_accuracy: 0.9902 - val_loss: 0.0409


<keras.src.callbacks.history.History at 0x1cd790ed450>

In [21]:
# Step 6: Evaluate on test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc:.2%}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9866 - loss: 0.0416
Test Accuracy: 98.98%
