In [1]:
import math

# Dataset
scores = [55, 92, 78, 60, 85, 78, 90, 66, 73, 88]

# 1. Mean
total = sum(scores)
n = len(scores)
mean = total / n

# 2. Median
sorted_scores = sorted(scores)
if n % 2 == 0:
    median = (sorted_scores[n//2 - 1] + sorted_scores[n//2]) / 2
else:
    median = sorted_scores[n//2]

# 3. Mode
frequency = {}
for score in scores:
    if score in frequency:
        frequency[score] += 1
    else:
        frequency[score] = 1

max_freq = max(frequency.values())
mode = [key for key, value in frequency.items() if value == max_freq]

# 4. Range
range_val = max(scores) - min(scores)

# 5. Variance (Population)
squared_differences = [(x - mean)**2 for x in scores]
sum_squared_diff = sum(squared_differences)
variance = sum_squared_diff / n

# 6. Standard Deviation (Population)
std_deviation = math.sqrt(variance)

# Output
print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)
print("Range:", range_val)
print("Squared Differences:", squared_differences)
print("Variance (Population):", variance)
print("Standard Deviation (Population):", std_deviation)


Mean: 76.5
Median: 78.0
Mode: [78]
Range: 37
Squared Differences: [462.25, 240.25, 2.25, 272.25, 72.25, 2.25, 182.25, 110.25, 12.25, 132.25]
Variance (Population): 148.85
Standard Deviation (Population): 12.200409829181968


In [2]:
import numpy as np
from scipy import stats

# Dataset
scores = np.array([55, 92, 78, 60, 85, 78, 90, 66, 73, 88])

# Calculations using NumPy
mean_np = np.mean(scores)
median_np = np.median(scores)
range_np = np.max(scores) - np.min(scores)
variance_np = np.var(scores)  # population variance by default
std_dev_np = np.std(scores)   # population std deviation

# Mode using scipy
mode_result = stats.mode(scores, keepdims=True)
mode_value = mode_result.mode[0]
mode_count = mode_result.count[0]


In [14]:
#- **Comparison**: The results from manual calculations and NumPy functions are exactly the same, showing correctness of both approaches. 
#- **Accuracy**: NumPy simplifies and automates these calculations while reducing the risk of human error.
#- **Mode Insight**: The score `78` occurs most frequently, so it's the most common student score.

In [3]:
import numpy as np

X_experience = np.array([1, 2, 3, 4, 5, 6, 7, 8]).reshape(-1, 1)
Y_salary = np.array([45, 50, 60, 65, 75, 80, 90, 95])
# Calculate predicted salaries using the guessed line
Y_predicted = 7 * X_experience + 40
for i in range(len(X_experience)):
    x = X_experience[i][0]
    y_actual = Y_salary[i]
    y_pred = 7 * x + 40
    print(f"X: {x}, Y_actual: {y_actual}, Y_predicted: {y_pred}")


X: 1, Y_actual: 45, Y_predicted: 47
X: 2, Y_actual: 50, Y_predicted: 54
X: 3, Y_actual: 60, Y_predicted: 61
X: 4, Y_actual: 65, Y_predicted: 68
X: 5, Y_actual: 75, Y_predicted: 75
X: 6, Y_actual: 80, Y_predicted: 82
X: 7, Y_actual: 90, Y_predicted: 89
X: 8, Y_actual: 95, Y_predicted: 96


In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Data
X_experience = np.array([1, 2, 3, 4, 5, 6, 7, 8]).reshape(-1, 1)
Y_salary = np.array([45, 50, 60, 65, 75, 80, 90, 95])

# Model
model = LinearRegression()
model.fit(X_experience, Y_salary)

In [5]:
slope = model.coef_[0]      # m
intercept = model.intercept_  # c

print("Learned Slope (m):", slope)
print("Learned Intercept (c):", intercept)


Learned Slope (m): 7.380952380952383
Learned Intercept (c): 36.78571428571428


In [7]:
Y_predicted = model.predict(X_experience)


In [8]:
mse = mean_squared_error(Y_salary, Y_predicted)
r2 = r2_score(Y_salary, Y_predicted)

print("Mean Squared Error (MSE):", mse)
print("R-squared (R²) Score:", r2)


Mean Squared Error (MSE): 1.4880952380952395
R-squared (R²) Score: 0.994824016563147


In [9]:
import numpy as np

# Data
X_prep_hours = np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]).reshape(-1, 1)
Y_pass_fail = np.array([0, 0, 0, 0, 1, 0, 1, 1, 1, 1])

# Parameters
m = 2
c = -5

# Step 1 & 2: Calculate z = mX + c
z = m * X_prep_hours + c  # shape (10, 1)

# Step 3: Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

probabilities = sigmoid(z)

# Step 4: Predict classes using threshold 0.5
predicted_classes = (probabilities >= 0.5).astype(int).flatten()

# Step 5: Accuracy calculation
correct_predictions = np.sum(predicted_classes == Y_pass_fail)
total_predictions = len(Y_pass_fail)
accuracy = correct_predictions / total_predictions

# Output
print("Predicted Probabilities:", probabilities.flatten())
print("Predicted Classes:", predicted_classes)
print("Accuracy:", accuracy)


Predicted Probabilities: [0.01798621 0.04742587 0.11920292 0.26894142 0.5        0.73105858
 0.88079708 0.95257413 0.98201379 0.99330715]
Predicted Classes: [0 0 0 0 1 1 1 1 1 1]
Accuracy: 0.9


In [15]:
#- **Slope (m) and Intercept (c)**: The learned slope and intercept indicate that salary increases linearly with experience. For every 1 year increase in experience, salary increases by approximately the slope value.
#- **MSE**: The Mean Squared Error (MSE) reflects the average squared difference between the actual and predicted salaries. A lower MSE means better accuracy.
#- **R² Score**: The R-squared score shows how much of the variance in salary is explained by experience. A value close to 1 means a very good fit.

In [10]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Data
X_prep_hours = np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]).reshape(-1, 1)
Y_pass_fail = np.array([0, 0, 0, 0, 1, 0, 1, 1, 1, 1])

# 1. Fit logistic regression model
model = LogisticRegression(solver='liblinear')
model.fit(X_prep_hours, Y_pass_fail)

# 2. Print learned coefficient (m) and intercept (c)
m = model.coef_[0][0]
c = model.intercept_[0]

print("Learned Coefficient (m):", m)
print("Learned Intercept (c):", c)

# 3. Predict classes
predicted_classes = model.predict(X_prep_hours)

# 4. Accuracy and Confusion Matrix
accuracy = accuracy_score(Y_pass_fail, predicted_classes)
conf_matrix = confusion_matrix(Y_pass_fail, predicted_classes)

print("Predicted Classes:", predicted_classes)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Learned Coefficient (m): 0.5361985605622785
Learned Intercept (c): -1.0077449163597512
Predicted Classes: [0 0 0 1 1 1 1 1 1 1]
Accuracy: 0.8
Confusion Matrix:
 [[3 2]
 [0 5]]


In [16]:
 #- **Accuracy Score**: Indicates the proportion of correct predictions out of all predictions. A high accuracy means the model is reliable.
 #- **Confusion Matrix**:
 # - **True Positives (TP)**: Students who were predicted to pass and did pass.
  #- **True Negatives (TN)**: Students predicted to fail and did fail.
  #- **False Positives (FP)**: Students predicted to pass but failed.
  #- **False Negatives (FN)**: Students predicted to fail but passed.
#- This breakdown helps us understand not just how accurate the model is, but also where and how it makes mistakes.


In [11]:
import numpy as np

# Data
X_cluster_data = np.array([
    [2, 10], [2, 5], [8, 4], [5, 8],
    [7, 5], [6, 4], [1, 2], [4, 9]
])

# Step 1: Initial Centroids (C1 = point 0, C2 = point 1)
C1 = X_cluster_data[0]  # [2, 10]
C2 = X_cluster_data[1]  # [2, 5]

def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2)**2))

# Step 2: Assignment Step - Iteration 1
assignments = []
for point in X_cluster_data:
    d1 = euclidean_distance(point, C1)
    d2 = euclidean_distance(point, C2)
    if d1 < d2:
        assignments.append(1)  # Cluster 1
    else:
        assignments.append(2)  # Cluster 2

print("Iteration 1 Assignments:")
for i, cluster in enumerate(assignments):
    print(f"Point {i} -> Cluster {cluster}")

# Step 3: Update Step - Iteration 1
cluster1_points = X_cluster_data[np.array(assignments) == 1]
cluster2_points = X_cluster_data[np.array(assignments) == 2]

new_C1 = np.mean(cluster1_points, axis=0)
new_C2 = np.mean(cluster2_points, axis=0)

print("\nUpdated Centroids after Iteration 1:")
print("New C1:", new_C1)
print("New C2:", new_C2)

# Step 4: Assignment Step - Iteration 2 (Optional but done here)
assignments_iter2 = []
for point in X_cluster_data:
    d1 = euclidean_distance(point, new_C1)
    d2 = euclidean_distance(point, new_C2)
    if d1 < d2:
        assignments_iter2.append(1)
    else:
        assignments_iter2.append(2)

print("\nIteration 2 Assignments:")
for i, cluster in enumerate(assignments_iter2):
    print(f"Point {i} -> Cluster {cluster}")


Iteration 1 Assignments:
Point 0 -> Cluster 1
Point 1 -> Cluster 2
Point 2 -> Cluster 2
Point 3 -> Cluster 1
Point 4 -> Cluster 2
Point 5 -> Cluster 2
Point 6 -> Cluster 2
Point 7 -> Cluster 1

Updated Centroids after Iteration 1:
New C1: [3.66666667 9.        ]
New C2: [4.8 4. ]

Iteration 2 Assignments:
Point 0 -> Cluster 1
Point 1 -> Cluster 2
Point 2 -> Cluster 2
Point 3 -> Cluster 1
Point 4 -> Cluster 2
Point 5 -> Cluster 2
Point 6 -> Cluster 2
Point 7 -> Cluster 1


In [12]:
import numpy as np
from sklearn.cluster import KMeans

# Data
X_cluster_data = np.array([
    [2, 10], [2, 5], [8, 4], [5, 8],
    [7, 5], [6, 4], [1, 2], [4, 9]
])

# 1. Fit KMeans model
kmeans = KMeans(n_clusters=2, random_state=42, n_init='auto')  # Or n_init=10 if 'auto' gives a warning
kmeans.fit(X_cluster_data)

# 2. Print final centroids
print("Final Centroids (Cluster Centers):")
print(kmeans.cluster_centers_)

# 3. Print cluster labels assigned to each data point
print("\nCluster Labels for Each Point:")
for idx, label in enumerate(kmeans.labels_):
    print(f"Point {idx} -> Cluster {label}")


Final Centroids (Cluster Centers):
[[5.5  3.75]
 [3.25 8.  ]]

Cluster Labels for Each Point:
Point 0 -> Cluster 1
Point 1 -> Cluster 1
Point 2 -> Cluster 0
Point 3 -> Cluster 1
Point 4 -> Cluster 0
Point 5 -> Cluster 0
Point 6 -> Cluster 0
Point 7 -> Cluster 1
