# ML Learners' Space – Assignment 1 Solutions

## Problem 1: NumPy Array Operations

In [1]:

import numpy as np

# Generate array
array_2d = np.random.randint(1, 51, size=(5, 4))
print("Array:\n", array_2d)

# Anti-diagonal
anti_diag = [array_2d[i, -1-i] for i in range(min(array_2d.shape))]
print("Anti-diagonal:", anti_diag)

# Max in each row
row_max = np.max(array_2d, axis=1)
print("Max in each row:", row_max)

# Elements ≤ mean
mean_val = np.mean(array_2d)
less_than_mean = array_2d[array_2d <= mean_val]
print("Elements ≤ mean:", less_than_mean)

# Boundary traversal
def numpy_boundary_traversal(matrix):
    top = list(matrix[0])
    right = list(matrix[1:-1, -1])
    bottom = list(matrix[-1][::-1])
    left = list(matrix[1:-1, 0][::-1])
    return top + right + bottom + left

print("Boundary Traversal:", numpy_boundary_traversal(array_2d))


Array:
 [[31 44  3 19]
 [ 1 35  4 24]
 [16 41 13 16]
 [18 14 43 48]
 [44 15 13 26]]
Anti-diagonal: [np.int32(19), np.int32(4), np.int32(41), np.int32(18)]
Max in each row: [44 35 41 48 44]
Elements ≤ mean: [ 3 19  1  4 16 13 16 18 14 15 13]
Boundary Traversal: [np.int32(31), np.int32(44), np.int32(3), np.int32(19), np.int32(24), np.int32(16), np.int32(48), np.int32(26), np.int32(13), np.int32(15), np.int32(44), np.int32(18), np.int32(16), np.int32(1)]


## Problem 2: 1D NumPy Array Operations

In [2]:

# Create 1D array
array_1d = np.random.uniform(0, 10, 20)
array_1d = np.round(array_1d, 2)
print("Array:", array_1d)

# Stats
print("Min:", np.min(array_1d))
print("Max:", np.max(array_1d))
print("Median:", np.median(array_1d))

# Replace elements < 5 with their squares
array_replaced = np.where(array_1d < 5, np.round(array_1d ** 2, 2), array_1d)
print("Modified Array:", array_replaced)

# Alternate sorting
def numpy_alternate_sort(arr):
    sorted_arr = np.sort(arr)
    result = []
    left, right = 0, len(sorted_arr) - 1
    while left <= right:
        result.append(sorted_arr[left])
        if left != right:
            result.append(sorted_arr[right])
        left += 1
        right -= 1
    return np.array(result)

print("Alternate Sorted:", numpy_alternate_sort(array_1d))


Array: [9.28 2.63 2.33 7.55 6.66 7.89 5.92 2.71 3.72 4.57 4.92 3.97 2.56 7.48
 1.31 9.67 0.41 4.47 5.27 8.78]
Min: 0.41
Max: 9.67
Median: 4.745
Modified Array: [ 9.28  6.92  5.43  7.55  6.66  7.89  5.92  7.34 13.84 20.88 24.21 15.76
  6.55  7.48  1.72  9.67  0.17 19.98  5.27  8.78]
Alternate Sorted: [0.41 9.67 1.31 9.28 2.33 8.78 2.56 7.89 2.63 7.55 2.71 7.48 3.72 6.66
 3.97 5.92 4.47 5.27 4.57 4.92]


## Problem 3: Pandas DataFrame Operations

In [5]:

import pandas as pd
import numpy as np

# Generate data
names = [f"Student{i}" for i in range(1, 11)]
subjects = ["Math", "Science", "English", "History", "Art", "Math", "Science", "English", "History", "Art"]
scores = np.random.randint(50, 101, size=10)

# Create DataFrame
df = pd.DataFrame({
    "Name": names,
    "Subject": subjects,
    "Score": scores
})

# Assign grades
def assign_grade(score):
    if score >= 90:
        return "A"
    elif score >= 80:
        return "B"
    elif score >= 70:
        return "C"
    elif score >= 60:
        return "D"
    else:
        return "F"

df["Grade"] = df["Score"].apply(assign_grade)
print(df)


# Sorted by score
print("Sorted by Score:\n", df.sort_values(by="Score", ascending=False))

# Average score per subject
print("Average Score per Subject:\n", df.groupby("Subject")["Score"].mean())

# Filter A or B
def pandas_filter_pass(dataframe):
    return dataframe[dataframe["Grade"].isin(["A", "B"])]

print("Filtered A or B:\n", pandas_filter_pass(df))


        Name  Subject  Score Grade
0   Student1     Math     61     D
1   Student2  Science     71     C
2   Student3  English     79     C
3   Student4  History     61     D
4   Student5      Art     64     D
5   Student6     Math     66     D
6   Student7  Science     53     F
7   Student8  English     97     A
8   Student9  History     70     C
9  Student10      Art     91     A
Sorted by Score:
         Name  Subject  Score Grade
7   Student8  English     97     A
9  Student10      Art     91     A
2   Student3  English     79     C
1   Student2  Science     71     C
8   Student9  History     70     C
5   Student6     Math     66     D
4   Student5      Art     64     D
0   Student1     Math     61     D
3   Student4  History     61     D
6   Student7  Science     53     F
Average Score per Subject:
 Subject
Art        77.5
English    88.0
History    65.5
Math       63.5
Science    62.0
Name: Score, dtype: float64
Filtered A or B:
         Name  Subject  Score Grade
7   Student8  E

## Problem 4: NLP with Naive Bayes

In [6]:

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Create synthetic dataset
positive_reviews = ["Great movie!" for _ in range(50)]
negative_reviews = ["Terrible movie." for _ in range(50)]
reviews = positive_reviews + negative_reviews
sentiments = ["positive"] * 50 + ["negative"] * 50

df_reviews = pd.DataFrame({"Review": reviews, "Sentiment": sentiments})

# Vectorize
vectorizer = CountVectorizer(max_features=500, stop_words='english')
X = vectorizer.fit_transform(df_reviews["Review"])
y = df_reviews["Sentiment"]

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_nb = MultinomialNB()
model_nb.fit(X_train, y_train)
y_pred = model_nb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Prediction function
def predict_review_sentiment(model, vectorizer, review):
    vec = vectorizer.transform([review])
    return model.predict(vec)[0]

print("Prediction Example:", predict_review_sentiment(model_nb, vectorizer, "An amazing experience!"))


Accuracy: 1.0
Prediction Example: negative


## Problem 5: NLP with Logistic Regression

In [7]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

# Synthetic dataset
feedback = ["Excellent product!" for _ in range(50)] + ["Not good at all." for _ in range(50)]
labels = ["good"] * 50 + ["bad"] * 50
df_feedback = pd.DataFrame({"Text": feedback, "Label": labels})

# Vectorize
tfidf_vectorizer = TfidfVectorizer(max_features=300, lowercase=True, stop_words='english')
X = tfidf_vectorizer.fit_transform(df_feedback["Text"])
y = df_feedback["Label"]

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)
y_pred = model_lr.predict(X_test)

# Metrics
print("Precision:", precision_score(y_test, y_pred, pos_label="good"))
print("Recall:", recall_score(y_test, y_pred, pos_label="good"))
print("F1 Score:", f1_score(y_test, y_pred, pos_label="good"))

# Preprocessing function
def text_preprocess_vectorize(texts, vectorizer):
    return vectorizer.transform(texts)

print("Vectorized example:\n", text_preprocess_vectorize(["Amazing service!"], tfidf_vectorizer).toarray())


Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Vectorized example:
 [[0. 0. 0.]]
