# Practical Implementation: ML, Deep Learning, and NLP

## Part 1: Classical ML with Scikit-learn - Iris Dataset

In [None]:

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load dataset
iris_data = load_iris(as_frame=True)
df = iris_data.frame

# Introduce a missing value for demo
df.iloc[3, 2] = np.nan
print("Sample data with missing value:\n", df.head())

# Handle missing values
df.fillna(df.mean(numeric_only=True), inplace=True)

# Encode target labels
label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['target'])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:, :-1], df['target'], test_size=0.2, random_state=42
)

# Train Decision Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predictions & evaluation
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))


## Part 2: Deep Learning with TensorFlow - MNIST Dataset

In [None]:

import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

# Load MNIST
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize
x_train = x_train / 255.0
x_test = x_test / 255.0

# Reshape for CNN
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Build CNN
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train
history = model.fit(x_train, y_train, epochs=5, validation_split=0.1)

# Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test accuracy:", test_acc)

# Visualize predictions
predictions = model.predict(x_test[:5])

plt.figure(figsize=(10, 2))
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
    plt.title(f"Pred: {predictions[i].argmax()}")
    plt.axis('off')
plt.show()


## Part 3: NLP with spaCy - Amazon Reviews

In [None]:

import spacy
nlp = spacy.load("en_core_web_sm")

# Sample reviews
reviews = [
    "I love my new Apple iPhone 14, it's super fast!",
    "The Samsung Galaxy buds have great sound quality.",
    "Avoid this brand at all costs, terrible service!"
]

# Named Entity Recognition
for review in reviews:
    doc = nlp(review)
    print(f"Review: {review}")
    print("Entities:", [(ent.text, ent.label_) for ent in doc.ents])
    print()

# Rule-based sentiment
positive_words = {"love", "great", "excellent", "fast", "amazing"}
negative_words = {"terrible", "bad", "worst", "avoid", "costs"}

def sentiment_analysis(text):
    text_lower = text.lower()
    if any(word in text_lower for word in positive_words):
        return "Positive"
    elif any(word in text_lower for word in negative_words):
        return "Negative"
    else:
        return "Neutral"

for review in reviews:
    print(f"Review: {review} | Sentiment: {sentiment_analysis(review)}")


## Part 4: Ethics & Optimization

In [None]:

# Ethical Considerations
'''
Bias in MNIST:
- Mostly neat, centered digits; poor generalization to unusual handwriting.
Bias in Amazon Reviews:
- Sentiment misinterpretation due to slang, cultural context, or non-English phrases.

Mitigation:
- TensorFlow Fairness Indicators to check subgroup performance.
- spaCy rule-based custom patterns for domain-specific context.
'''

# Debugging Example
'''
Bug: model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
Fix: Use 'sparse_categorical_crossentropy' for classification.

Bug: Shape mismatch with Conv2D input.
Fix: Ensure input shape matches (batch, height, width, channels).
'''
