In [5]:
# Creating two portfolio-ready Jupyter notebooks (IPYNB) for Colab:
# 1) MNIST Handwritten Digit Recognizer (portfolio-ready)
# 2) MovieLens 100k Movie Recommender (portfolio-ready)
# Files will be saved to /mnt/data and displayed for download.
import nbformat as nbf, os, textwrap, json

OUT_DIR = "/mnt/data/colab_notebooks_for_vaishali"
os.makedirs(OUT_DIR, exist_ok=True)

def make_notebook(cells, path):
    nb = nbf.v4.new_notebook()
    nb['cells'] = cells
    with open(path, 'w', encoding='utf-8') as f:
        nbf.write(nb, f)

# Notebook 1: MNIST (portfolio-ready)
cells_mnist = []

cells_mnist.append(nbf.v4.new_markdown_cell("# Handwritten Digit Recognizer (MNIST)\n\n**Portfolio-ready Colab notebook**\n\n**What you'll find:** problem statement, approach, full code (TensorFlow/Keras), training, evaluation, sample predictions, and suggestions for improvement.\n\n---"))

cells_mnist.append(nbf.v4.new_markdown_cell("## 1) Setup & Dependencies\nRun the cell below to install required packages (on Colab `tensorflow` is typically preinstalled, but we include the command for reproducibility)."))

cells_mnist.append(nbf.v4.new_code_cell("!pip install -q tensorflow numpy matplotlib\nimport tensorflow as tf\nprint('TensorFlow version:', tf.__version__)"))

cells_mnist.append(nbf.v4.new_markdown_cell("## 2) Problem Statement\nPredict the handwritten digit (0–9) from 28x28 grayscale images using a Convolutional Neural Network (CNN). We'll use the built-in MNIST dataset provided by TensorFlow."))

cells_mnist.append(nbf.v4.new_code_cell(textwrap.dedent("""
# 3) Load data and visualize samples
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# show a grid of sample images with labels
fig, axes = plt.subplots(2,5, figsize=(10,4))
for i, ax in enumerate(axes.flatten()):
    ax.imshow(x_train[i], cmap='gray')
    ax.set_title(f'label: {y_train[i]}')
    ax.axis('off')
plt.show()""")))

cells_mnist.append(nbf.v4.new_markdown_cell("## 4) Build the CNN model\nA compact CNN suitable for MNIST. You can increase depth or add augmentation for better accuracy."))

cells_mnist.append(nbf.v4.new_code_cell(textwrap.dedent("""
from tensorflow.keras import layers, models

def build_model():
    model = models.Sequential([
        layers.Input(shape=(28,28,1)),
        layers.Reshape((28,28,1)),
        layers.Conv2D(32, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_model()
model.summary()""")))

cells_mnist.append(nbf.v4.new_markdown_cell("## 5) Training\nUse a small number of epochs for quick runs; increase for better accuracy. The notebook will save the model to `model_mnist.h5`. You can also use Colab GPU runtime for faster training (Runtime → Change runtime type → GPU)."))

cells_mnist.append(nbf.v4.new_code_cell(textwrap.dedent("""
# Prepare data shapes
x_train_in = np.expand_dims(x_train, -1)
x_test_in = np.expand_dims(x_test, -1)

# Train
history = model.fit(x_train_in, y_train, validation_split=0.1, epochs=5, batch_size=128)

# Evaluate
loss, acc = model.evaluate(x_test_in, y_test, verbose=2)
print(f'Test accuracy: {acc:.4f}')

# Save model
model.save('model_mnist.h5')
print('Saved model to model_mnist.h5')""")))

cells_mnist.append(nbf.v4.new_markdown_cell("## 6) Training curves\nPlot accuracy and loss during training."))

cells_mnist.append(nbf.v4.new_code_cell(textwrap.dedent("""
import matplotlib.pyplot as plt

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Loss')

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.title('Accuracy')
plt.show()""")))

cells_mnist.append(nbf.v4.new_markdown_cell("## 7) Sample Predictions\nLoad the saved model and predict on a few test images."))

cells_mnist.append(nbf.v4.new_code_cell(textwrap.dedent("""
from tensorflow.keras.models import load_model
import numpy as np
model2 = load_model('model_mnist.h5')

# Predict first 10 test images
preds = model2.predict(x_test_in[:10])
pred_labels = preds.argmax(axis=1)
for i in range(10):
    print(f'Index {i}: predicted={pred_labels[i]}, true={y_test[i]}')""")))

cells_mnist.append(nbf.v4.new_markdown_cell("## 8) Results & Conclusions\n- This simple CNN should reach ~98% accuracy with 5–10 epochs. Results may vary.\n- **Future improvements:** data augmentation, batch normalization, deeper network, hyperparameter tuning, or transfer learning.\n\n---\n\n*Download this notebook and open it in Colab (File → Upload notebook) or run it directly by copying the cells into a Colab notebook.*"))

mnist_path = os.path.join(OUT_DIR, "MNIST_Handwritten_Digit_Recognizer_Portfolio.ipynb")
make_notebook(cells_mnist, mnist_path)

In [4]:
cells_mov = []
cells_mov.append(nbf.v4.new_markdown_cell("# Movie Recommendation System (MovieLens 100K)\n\n**Portfolio-ready Colab notebook**\n\nA simple item-based collaborative filtering recommender using the MovieLens 100k dataset. Includes data loading, exploratory analysis, a baseline recommender, evaluation (RMSE), and top-N recommendations.\n\n---"))

cells_mov.append(nbf.v4.new_markdown_cell("## 1) Setup & Dependencies\nInstall required packages. The notebook will download the MovieLens 100k dataset at runtime."))

cells_mov.append(nbf.v4.new_code_cell("!pip install -q pandas numpy scikit-learn\nimport pandas as pd, numpy as np\nprint('Ready')"))

cells_mov.append(nbf.v4.new_markdown_cell("## 2) Download and Load Data\nWe will download the MovieLens 100k zip from GroupLens and load `u.data`. The dataset contains user-item ratings (1–5)."))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
import os, urllib.request, zipfile, io
DATA_URL = "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
fn = 'ml-100k.zip'
if not os.path.exists('ml-100k'):
    print('Downloading MovieLens 100k...')
    r = urllib.request.urlopen(DATA_URL)
    z = zipfile.ZipFile(io.BytesIO(r.read()))
    z.extractall()
    print('Extracted to ml-100k/')
else:
    print('MovieLens already present.')

# Load u.data
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('ml-100k/u.data', sep='\\t', names=cols)
df.head()""")))

cells_mov.append(nbf.v4.new_markdown_cell("## 3) Exploratory Analysis\nQuick look at dataset size and rating distribution."))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
print('Total ratings:', len(df))
print('Unique users:', df['user_id'].nunique())
print('Unique items:', df['item_id'].nunique())

# rating counts
df['rating'].value_counts().sort_index().plot(kind='bar', title='Rating distribution')""")))

cells_mov.append(nbf.v4.new_markdown_cell("## 4) Build user-item matrix and item-based recommender\nWe compute item-item cosine similarity and score items for a user based on their existing ratings."))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
from sklearn.metrics.pairwise import cosine_similarity

def build_user_item_matrix(df):
    return df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

ratings_matrix = build_user_item_matrix(df)
ratings_matrix.shape""")))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
# compute item-item cosine similarity (may take a few seconds)
item_sim = cosine_similarity(ratings_matrix.T)
item_sim = pd.DataFrame(item_sim, index=ratings_matrix.columns, columns=ratings_matrix.columns)
item_sim.iloc[:4,:4]""")))

cells_mov.append(nbf.v4.new_markdown_cell("## 5) Recommendation function\nReturn top-N recommendations for a given user (filtering already rated items)."))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
def item_based_recommend(user_id, ratings_matrix, item_sim, top_n=10):
    user_ratings = ratings_matrix.loc[user_id]
    scores = item_sim.dot(user_ratings) / (item_sim.sum(axis=1) + 1e-9)
    unrated = user_ratings[user_ratings==0].index
    scores = scores.loc[unrated].sort_values(ascending=False)
    return scores.head(top_n)

# Example: recommend for user 1
top = item_based_recommend(1, ratings_matrix, item_sim, top_n=10)
top.head(10)""")))

cells_mov.append(nbf.v4.new_markdown_cell("## 6) Evaluate (simple RMSE baseline)\nA simple train/test split evaluation to compute RMSE for predicted ratings (baseline method). This is not a production evaluation but an illustrative baseline."))

cells_mov.append(nbf.v4.new_code_cell(textwrap.dedent("""
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def evaluate(df):
    train, test = train_test_split(df, test_size=0.2, random_state=42)
    train_mat = train.pivot(index='user_id', columns='item_id', values='rating').fillna(0)
    test_mat = test.pivot(index='user_id', columns='item_id', values='rating').fillna(0)
    # align columns
    test_mat = test_mat.reindex(columns=train_mat.columns, fill_value=0)
    # compute item similarity on train matrix
    sim = cosine_similarity(train_mat.T)
    preds = train_mat.dot(sim) / (np.abs(sim).sum(axis=1)+1e-9)
    preds = pd.DataFrame(preds, index=train_mat.index, columns=train_mat.columns)
    trues, pred_vals = [], []
    for u in test_mat.index:
        for i in test_mat.columns:
            true = test_mat.at[u,i]
            if true>0:
                trues.append(true)
                pred_vals.append(preds.at[u,i])
    rmse = mean_squared_error(trues, pred_vals, squared=False)
    return rmse

rmse = evaluate(df)
print('Baseline RMSE:', rmse)""")))

cells_mov.append(nbf.v4.new_markdown_cell("## 7) Interpretability & Next Steps\n- The simple item-based approach is easy to explain and implement.\n- **Improvements:** matrix factorization (SVD), model-based collaborative filtering, hybrid filtering with item metadata, implicit-feedback methods, scalability optimizations, and hyperparameter tuning.\n\n---\n\n*Download this notebook and open it in Colab (File → Upload notebook) or run it directly by copying the cells into a Colab notebook.*"))

mov_path = os.path.join(OUT_DIR, "MovieLens_Recommender_Portfolio.ipynb")
make_notebook(cells_mov, mov_path)

# Create a README and zip both notebooks for download
readme = textwrap.dedent(f"""Two portfolio-ready Colab notebooks were created and saved to:\n\n{OUT_DIR}\n\nFiles:\n- MNIST_Handwritten_Digit_Recognizer_Portfolio.ipynb\n- MovieLens_Recommender_Portfolio.ipynb\n\nDownload them and upload to Google Colab (File → Upload notebook) or add them to a GitHub repo and use 'Open in Colab' there.\n""")

with open(os.path.join(OUT_DIR, "README.txt"), "w") as f:
    f.write(readme)

# Create zip
import zipfile
zip_path = "/mnt/data/colab_notebooks_for_vaishali.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
    for fn in os.listdir(OUT_DIR):
        zf.write(os.path.join(OUT_DIR, fn), arcname=fn)

zip_path

'/mnt/data/colab_notebooks_for_vaishali.zip'