In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import matplotlib.pyplot as plt

df = pd.read_csv("face_data_multi.csv")  

y = df.iloc[:, 0].values
X = df.iloc[:, 1:].values


A1,A2

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X[:, 0].reshape(-1, 1), X[:, 1], test_size=0.2, random_state=42)

reg = LinearRegression().fit(X_train, y_train)

y_train_pred = reg.predict(X_train)
y_test_pred = reg.predict(X_test)

def evaluate_regression(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mape, r2

train_metrics = evaluate_regression(y_train, y_train_pred)
test_metrics = evaluate_regression(y_test, y_test_pred)

print("Train Set: MSE:", train_metrics[0], "RMSE:", train_metrics[1], "MAPE:", train_metrics[2], "R2:", train_metrics[3])
print("Test Set: MSE:", test_metrics[0], "RMSE:", test_metrics[1], "MAPE:", test_metrics[2], "R2:", test_metrics[3])


Train Set: MSE: 530.7435973989577 RMSE: 23.037873109272862 MAPE: 624390201955009.8 R2: 0.9139890790210013
Test Set: MSE: 542.7924388291003 RMSE: 23.297906318575073 MAPE: 455017428112708.6 R2: 0.9126394434133814


A3

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, X[:, 1], test_size=0.2, random_state=42)

reg_all = LinearRegression().fit(X_train, y_train)

y_train_pred_all = reg_all.predict(X_train)
y_test_pred_all = reg_all.predict(X_test)

train_metrics_all = evaluate_regression(y_train, y_train_pred_all)
test_metrics_all = evaluate_regression(y_test, y_test_pred_all)

print("Train Set with All Attributes:", train_metrics_all)
print("Test Set with All Attributes:", test_metrics_all)


Train Set with All Attributes: (2.3933165217039002e-24, 1.547034751291612e-12, 98.2433663537836, 1.0)
Test Set with All Attributes: (2.50545911672041e-24, 1.5828642129760879e-12, 92.38652378504273, 1.0)


A4

In [None]:
kmeans = KMeans(n_clusters=2, random_state=42, n_init="auto").fit(X_train)

labels = kmeans.labels_
centers = kmeans.cluster_centers_

print("Cluster Centers:\n", centers)


Cluster Centers:
 [[143.6625498  141.08615538 139.13227092 ... 138.17778884 138.90268924
  139.52131474]
 [ 84.90359462  82.56297448  79.88621604 ...  81.01399433  81.87972194
   83.10683253]]


A5

In [7]:
silhouette = silhouette_score(X_train, labels)
ch_score = calinski_harabasz_score(X_train, labels)
db_index = davies_bouldin_score(X_train, labels)

print("Silhouette Score:", silhouette)
print("Calinski-Harabasz Score:", ch_score)
print("Davies-Bouldin Index:", db_index)


Silhouette Score: 0.15771371478240168
Calinski-Harabasz Score: 4635.032715053036
Davies-Bouldin Index: 2.072743949399763


A6

In [None]:
k_values = range(2, 10)
silhouette_scores, ch_scores, db_scores = [], [], []

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init="auto").fit(X_train)
    labels = kmeans.labels_

    silhouette_scores.append(silhouette_score(X_train, labels))
    ch_scores.append(calinski_harabasz_score(X_train, labels))
    db_scores.append(davies_bouldin_score(X_train, labels))

plt.figure(figsize=(12, 5))
plt.subplot(1, 3, 1)
plt.plot(k_values, silhouette_scores, marker='o')
plt.title('Silhouette Score vs k')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')

plt.subplot(1, 3, 2)
plt.plot(k_values, ch_scores, marker='o')
plt.title('Calinski-Harabasz Score vs k')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('CH Score')

plt.subplot(1, 3, 3)
plt.plot(k_values, db_scores, marker='o')
plt.title('Davies-Bouldin Index vs k')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('DB Index')

plt.tight_layout()
plt.show()


A7

In [None]:
distortions = []

for k in range(2, 20):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init="auto").fit(X_train)
    distortions.append(kmeans.inertia_)

plt.figure(figsize=(6, 4))
plt.plot(range(2, 20), distortions, marker='o', linestyle='-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia (Distortion)')
plt.title('Elbow Method for Optimal k')
plt.show()
