In [None]:
1

In [1]:
from typing import List
import math

Vector = List[float]

# Define the vectors
height_weight_age = [70, 170, 40]
grades = [95, 80, 75, 62]

# Function to add corresponding elements of two vectors
def add(v: Vector, w: Vector) -> Vector:
    """Adds corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i + w_i for v_i, w_i in zip(v, w)]

# Function to subtract corresponding elements of two vectors
def subtract(v: Vector, w: Vector) -> Vector:
    """Subtracts corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i - w_i for v_i, w_i in zip(v, w)]

# Function to sum all corresponding elements of a list of vectors
def vector_sum(vectors: List[Vector]) -> Vector:
    """Sums all corresponding elements"""
    assert vectors, "no vectors provided!"
    num_elements = len(vectors[0])
    assert all(len(v) == num_elements for v in vectors), "different sizes!"
    return [sum(vector[i] for vector in vectors) for i in range(num_elements)]

# Function to multiply every element of a vector by a scalar
def scalar_multiply(c: float, v: Vector) -> Vector:
    """Multiplies every element by c"""
    return [c * v_i for v_i in v]

# Function to compute the element-wise average of a list of vectors
def vector_mean(vectors: List[Vector]) -> Vector:
    """Computes the element-wise average"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

# Function to compute the dot product of two vectors
def dot(v: Vector, w: Vector) -> float:
    """Computes v_1 * w_1 + ... + v_n * w_n"""
    assert len(v) == len(w), "vectors must be same length"
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

# Function to compute the sum of squares of a vector
def sum_of_squares(v: Vector) -> float:
    """Returns v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)

# Function to compute the magnitude (or length) of a vector
def magnitude(v: Vector) -> float:
    """Returns the magnitude (or length) of v"""
    return math.sqrt(sum_of_squares(v))

# Function to compute the squared distance between two vectors
def squared_distance(v: Vector, w: Vector) -> float:
    """Computes (v_1 - w_1) * 2 + ... + (v_n - w_n) * 2"""
    return sum_of_squares(subtract(v, w))

# Function to compute the distance between two vectors
def distance(v: Vector, w: Vector) -> float:
    """Computes the distance between v and w"""
    return math.sqrt(squared_distance(v, w))

if _name_ == "_main_":
    # Displaying the output of each function
    print("1. Add vectors:", add([1, 2, 3], [4, 5, 6]))
    print("2. Subtract vectors:", subtract([5, 7, 9], [4, 5, 6]))
    print("3. Vector sum:", vector_sum([[1, 2], [3, 4], [5, 6], [7, 8]]))
    print("4. Scalar multiply:", scalar_multiply(2, [1, 2, 3]))
    print("5. Vector mean:", vector_mean([[1, 2], [3, 4], [5, 6]]))
    print("6. Dot product:", dot([1, 2, 3], [4, 5, 6]))
    print("7. Sum of squares:", sum_of_squares([1, 2, 3]))
    print("8. Magnitude:", magnitude([3, 4]))
    print("9. Distance:", distance([1, 2, 3], [4, 5, 6]))

NameError: name '_name_' is not defined

In [2]:
2

2

In [None]:
from typing import List
from collections import Counter
import matplotlib.pyplot as plt
import math

num_friends = [100, 49, 41, 40, 25]
daily_minutes = []
daily_hours = []

friend_counts = Counter(num_friends)
xs = range(101)
ys = [friend_counts[x] for x in xs]
plt.bar(xs, ys)
plt.axis([0, 101, 0, 25])
plt.title("Histogram of Friend Counts")
plt.xlabel("# of friends")
plt.ylabel("# of people")
plt.show()

num_points = len(num_friends)
largest_value = max(num_friends)
smallest_value = min(num_friends)

sorted_values = sorted(num_friends)
smallest_value = sorted_values[0]
second_smallest_value = sorted_values[1]
second_largest_value = sorted_values[-2]

def mean(xs: List[float]) -> float:
    return sum(xs) / len(xs)

print("Mean:", mean(num_friends))

def _median_odd(xs: List[float]) -> float:
    return sorted(xs)[len(xs) // 2]

def _median_even(xs: List[float]) -> float:
    sorted_xs = sorted(xs)
    hi_midpoint = len(xs) // 2
    return (sorted_xs[hi_midpoint - 1] + sorted_xs[hi_midpoint]) / 2

def median(v: List[float]) -> float:
    return _median_even(v) if len(v) % 2 == 0 else _median_odd(v)

print("Median:", median(num_friends))

def quantile(xs: List[float], p: float) -> float:
    p_index = int(p * len(xs))
    return sorted(xs)[p_index]

print("Quantile (10th percentile):", quantile(num_friends, 0.10))
print("Quantile (25th percentile):", quantile(num_friends, 0.25))
print("Quantile (75th percentile):", quantile(num_friends, 0.75))
print("Quantile (90th percentile):", quantile(num_friends, 0.90))

def mode(x: List[float]) -> List[float]:
    counts = Counter(x)
    max_count = max(counts.values())
    return [x_i for x_i, count in counts.items() if count == max_count]

print("Mode:", mode(num_friends))

def data_range(xs: List[float]) -> float:
    return max(xs) - min(xs)

print("Data Range:", data_range(num_friends))

def de_mean(xs: List[float]) -> List[float]:
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

def sum_of_squares(xs: List[float]) -> float:
    return sum(x * x for x in xs)

def variance(xs: List[float]) -> float:
    assert len(xs) >= 2, "variance requires at least two elements"
    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)

print("Variance:", variance(num_friends))

def standard_deviation(xs: List[float]) -> float:
    return math.sqrt(variance(xs))

print("Standard Deviation:", standard_deviation(num_friends))

def interquartile_range(xs: List[float]) -> float:
    return quantile(xs, 0.75) - quantile(xs, 0.25)

print("Interquartile Range:", interquartile_range(num_friends))

In [None]:
3

In [None]:
import matplotlib.pyplot as plt 
hrs = [10,9,2,15,10,16,11,16]
scores = [95,80,10,50,45,90,38,93]

plt.plot(hrs,scores,color = 'red',marker = '*')
plt.xlabel('hrs studied')
plt.ylabel('score')
plt.title('performance')
plt.grid(True)
plt.show()



In [None]:
4

In [None]:
import pandas as pd
import matplotlib.pyplot as plt 
df = pd.read_csv('mtcars.csv')
plt.hist(df['mpg'],bins = 10,color = 'pink',linestyle = "-'')
plt.xlabel('miles per gallon')
plt.ylabel('frequency')
plt.title('histogram')
plt.show()


In [None]:
5

In [None]:
import pandas as pd
import numpy as np
import re

df = pd.read_csv(r"C:\Users\test.DESKTOP-3FMB32F\Downloads\BL-Flickr-Images-Book.csv")

irrel_col=['Edition Statement','Corporate Author','Corporate Contributors','Former owner','Engraver','Contributors','Issuance type','Shelfmarks']
df.drop(columns=irrel_col,inplace=True)

df.set_index('Identifier',inplace=True)

df['Date of Publication']=df['Date of Publication'].str.extract(r'^(\d{4})',expand=False)

df['Date of Publication']=pd.to_numeric(df['Date of Publication'],errors='coerce')

print(df.head()) 


In [None]:
6

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

model=LogisticRegression(C=1e4,max_iter=200)
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

accuracy = accuracy_score(y_test,y_pred)
print(f"Classification accuracy:{accuracy:4f}")	

In [None]:
7

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

kernels = ['linear', 'poly', 'rbf', 'sigmoid']
c_values = [0.01, 1, 10]
gamma_value = 0.5
best_accuracy = 0
best_parameters = {}
best_svc = None
best_support_vectors = None

for k in kernels:
    for C in c_values:
        svc = SVC(kernel=k, gamma=gamma_value, C=C, decision_function_shape='ovr')
        svc.fit(X_train, y_train)
        y_pred = svc.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_parameters = {'kernel': k, 'C': C}
            best_svc = svc
            best_support_vectors = svc.n_support_

print(f'Best accuracy on test set: {best_accuracy:.4f}')
print(f'Best parameters: {best_parameters}')
print(f'Total number of support vectors: {best_support_vectors.sum()}')

In [None]:
8

In [None]:
 

import math
import pandas as pd
import numpy as np
from collections import Counter, defaultdict


data = {
    'Price': ['Low', 'Low', 'Low', 'Low', 'Low', 'Med', 'Med', 'Med', 'Med', 'High', 'High', 'High', 'High'],
    'Maintenance': ['Low', 'Med', 'Low', 'Med', 'High', 'Med', 'Med', 'High', 'High', 'Med', 'Med', 'High', 'High'],
    'Capacity': [2, 4, 4, 4, 4, 4, 4, 2, 5, 4, 2, 2, 5],
    'Airbag': ['No', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes'],
    'Profitable': ['Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']
}


df = pd.DataFrame(data)


def entropy(labels):
    label_counts = Counter(labels)
    total_count = len(labels)
    return -sum((count/total_count) * math.log2(count/total_count) for count in label_counts.values())


def info_gain(data, feature, target):
    total_entropy = entropy(data[target])
    values, counts = np.unique(data[feature], return_counts=True)
    weighted_entropy = sum((counts[i]/sum(counts)) * entropy(data[data[feature] == values[i]][target]) for i in range(len(values)))
    return total_entropy - weighted_entropy


def ID3(data, target, features):
 
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]
    
  
    if len(features) == 0:
        return Counter(data[target]).most_common(1)[0][0]
    
  
    gains = [info_gain(data, feature, target) for feature in features]
    best_feature = features[np.argmax(gains)]
    
    tree = {best_feature: {}}
    features = [f for f in features if f != best_feature]
    
   
    for value in np.unique(data[best_feature]):
        subset = data[data[best_feature] == value]
        subtree = ID3(subset, target, features)
        tree[best_feature][value] = subtree
    
    return tree


features = df.columns[:-1]
target = 'Profitable'
decision_tree = ID3(df, target, list(features))
print(decision_tree)


def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        for key, value in tree.items():
            print(f"{indent}{key}")
            print_tree(value, indent + "  ")
    else:
        print(f"{indent}-> {tree}")

print_tree(decision_tree)
                                            

In [None]:
9

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import rand_score
from scipy.cluster.hierarchy import dendrogram, linkage

data = pd.read_csv("Spiral.txt",delim_whitespace=True)
data.columns = ["x", "y", "label"]

X = data[["x", "y"]].values
y_true = data["label"].values

kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
y_kmeans = kmeans.labels_

single_link = AgglomerativeClustering(n_clusters=3, linkage='single').fit(X)
y_single_link = single_link.labels_

complete_link = AgglomerativeClustering(n_clusters=3, linkage='complete').fit(X)

y_complete_link = complete_link.labels_

rand_index_kmeans = rand_score(y_true, y_kmeans)
rand_index_single_link = rand_score(y_true, y_single_link)
rand_index_complete_link = rand_score(y_true, y_complete_link)

print(f"Rand Index for K-means: {rand_index_kmeans:.4f}")
print(f"Rand Index for Single-link Hierarchical Clustering: {rand_index_single_link:.4f}")
print(f"Rand Index for Complete-link Hierarchical Clustering: {rand_index_complete_link:.4f}")

fig, axs = plt.subplots(2, 2, figsize=(12, 12))

axs[0, 0].scatter(X[:, 0], X[:, 1], c=y_true, cmap='viridis')
axs[0, 0].set_title("Original Data")

axs[0, 1].scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis')
axs[0, 1].set_title("K-means Clustering")

axs[1, 0].scatter(X[:, 0], X[:, 1], c=y_single_link, cmap='viridis')
axs[1, 0].set_title("Single-link Hierarchical Clustering")

axs[1, 1].scatter(X[:, 0], X[:, 1], c=y_complete_link, cmap='viridis')

axs[1, 1].set_title("Complete-link Hierarchical Clustering")

plt.show()
linked = linkage(X, 'single')
plt.figure(figsize=(10, 7))
dendrogram(linked, labels=y_true)
plt.title('Single-link Hierarchical Clustering Dendrogram')
plt.show()

linked = linkage(X, 'complete')
plt.figure(figsize=(10, 7))
dendrogram(linked, labels=y_true)
plt.title('Complete-link Hierarchical Clustering Dendrogram')
plt.show()
