Part A: Calculating TF-IDF for Documents

In [None]:
# Import necessary libraries
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

# Given documents
documents = [
    "The quick brown fox jumps over the lazy dog.",
    "The fox is quick and jumps high.",
    "A lazy dog lies under the tree."
]

In [None]:
# Step 1: Initialize TfidfVectorizer
vectorizer = TfidfVectorizer()

# Step 2: Fit and transform the documents
tfidf_matrix = vectorizer.fit_transform(documents)

In [None]:
# Step 3: Convert the result to a DataFrame for better visualization
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())

# Print the resulting TF-IDF values for each word in each document
print("TF-IDF values:")
print(tfidf_df)

TF-IDF values:
        and     brown       dog       fox      high        is     jumps  \
0  0.000000  0.398811  0.303306  0.303306  0.000000  0.000000  0.303306   
1  0.443503  0.000000  0.000000  0.337295  0.443503  0.443503  0.337295   
2  0.000000  0.000000  0.358291  0.000000  0.000000  0.000000  0.000000   

       lazy     lies      over     quick       the     tree    under  
0  0.303306  0.00000  0.398811  0.303306  0.471089  0.00000  0.00000  
1  0.000000  0.00000  0.000000  0.337295  0.261940  0.00000  0.00000  
2  0.358291  0.47111  0.000000  0.000000  0.278245  0.47111  0.47111  


Part B: Quadratic Regression Model using TensorFlow

In [None]:
# Import necessary libraries
import tensorflow as tf
import numpy as np

# Step 1: Generate synthetic data (y = 2x^2 + 3x + 4)
x_train = np.linspace(-10, 10, 100)  # 100 points between -10 and 10
y_train = 2 * x_train**2 + 3 * x_train + 4 + np.random.normal(0, 10, 100)  # Adding noise

In [None]:
# Step 2: Define the quadratic model
class QuadraticModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Randomly initialized weights and bias
        self.w1 = tf.Variable(np.random.randn(), dtype=tf.float32)
        self.w2 = tf.Variable(np.random.randn(), dtype=tf.float32)
        self.b = tf.Variable(np.random.randn(), dtype=tf.float32)

    def call(self, x):
        return self.w1 * x**2 + self.w2 * x + self.b

In [None]:
# Step 3: Instantiate the model
model = QuadraticModel()

# Step 4: Define loss function and optimizer
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [None]:
# Step 5: Training the model
for epoch in range(1000):  # Train for 1000 epochs
    with tf.GradientTape() as tape:
        predictions = model(x_train)
        loss = loss_fn(y_train, predictions)
    # Compute gradients and update weights
    gradients = tape.gradient(loss, [model.w1, model.w2, model.b])
    optimizer.apply_gradients(zip(gradients, [model.w1, model.w2, model.b]))

    # Print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.numpy()}")

# Step 6: Print learned weights and bias
print("\nLearned weights and bias:")
print(f"w1 (x^2 coefficient): {model.w1.numpy()}")
print(f"w2 (x coefficient): {model.w2.numpy()}")
print(f"b (bias): {model.b.numpy()}")

Epoch 100, Loss: 3183.31005859375
Epoch 200, Loss: 802.8833618164062
Epoch 300, Loss: 214.19456481933594
Epoch 400, Loss: 120.4604263305664
Epoch 500, Loss: 111.00732421875
Epoch 600, Loss: 110.38652038574219
Epoch 700, Loss: 110.34178924560547
Epoch 800, Loss: 110.31951904296875
Epoch 900, Loss: 110.29678344726562
Epoch 1000, Loss: 110.27323913574219

Learned weights and bias:
w1 (x^2 coefficient): 2.03389048576355
w2 (x coefficient): 2.9513375759124756
b (bias): 4.040149211883545
