In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib as plt

In [7]:
data = {
    'age':[20,13,25,22,19],
    'height': [150,135,145,160,155],
    'weight': [57,63,70,61,65],
    'gender': [1,0,1,0,0]
}

df = pd.DataFrame(data)

print(df)

   age  height  weight  gender
0   20     150      57       1
1   13     135      63       0
2   25     145      70       1
3   22     160      61       0
4   19     155      65       0


In [8]:
X = df.drop('gender',axis=1)
Y = df['gender']

In [10]:
# Standardizing the data
scaler = StandardScaler()

X_scaled = scaler.fit_transform(df)

In [14]:
# Doing PCA on the data
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Splitting the data in train and test set
x_train,x_test,y_train,y_test = train_test_split(X_pca,Y,test_size=0.3,random_state=42)

In [20]:
model = LinearRegression()
model.fit(x_train,y_train)

y_pred = model.predict(x_test)
model.score(x_test,y_test)

0.0

In [21]:
# Evaluating with confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Female', 'Male'], yticklabels=['Female', 'Male'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

ValueError: Classification metrics can't handle a mix of binary and continuous targets

In [22]:
# Visualizing PCA results
y_numeric = pd.factorize(y)[0]

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y_numeric, cmap='coolwarm', edgecolor='k', s=80)
plt.xlabel('Original Feature 1')
plt.ylabel('Original Feature 2')
plt.title('Before PCA: Using First 2 Standardized Features')
plt.colorbar(label='Target classes')

plt.subplot(1, 2, 2)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_numeric, cmap='coolwarm', edgecolor='k', s=80)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('After PCA: Projected onto 2 Principal Components')
plt.colorbar(label='Target classes')

plt.tight_layout()
plt.show()

NameError: name 'y' is not defined