In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [41]:
df = pd.read_csv('/content/gender_classification_v7.csv')

In [42]:
df.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,Male
1,0,14.0,5.4,0,0,1,0,Female
2,0,11.8,6.3,1,1,1,1,Male
3,0,14.4,6.1,0,1,1,1,Male
4,1,13.5,5.9,0,0,0,0,Female


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5001 entries, 0 to 5000
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   long_hair                  5001 non-null   int64  
 1   forehead_width_cm          5001 non-null   float64
 2   forehead_height_cm         5001 non-null   float64
 3   nose_wide                  5001 non-null   int64  
 4   nose_long                  5001 non-null   int64  
 5   lips_thin                  5001 non-null   int64  
 6   distance_nose_to_lip_long  5001 non-null   int64  
 7   gender                     5001 non-null   object 
dtypes: float64(2), int64(5), object(1)
memory usage: 312.7+ KB


In [44]:
X = df.drop('gender', axis=1)
y = df['gender']

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [46]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [47]:
# Normal Model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model Accuracy : {accuracy_score(y_test, model.predict(X_test_scaled))}")

# L2
logreg_l2 = LogisticRegression(penalty='l2', C=1.0, solver='lbfgs', max_iter=1000)
logreg_l2.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with L2 Accuracy: {accuracy_score(y_test, logreg_l2.predict(X_test_scaled))}")


# L1
logreg_l1 = LogisticRegression(penalty='l1', C=1.0, solver='liblinear', max_iter=1000)
logreg_l1.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with L1 Accuracy: {accuracy_score(y_test, logreg_l1.predict(X_test_scaled))}")


# Elastic Net
logreg_en = LogisticRegression(penalty='elasticnet', l1_ratio=0.5, C=1.0, solver='saga', max_iter=1000)
logreg_en.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with Elastic Net Accuracy: {accuracy_score(y_test, logreg_en.predict(X_test_scaled))}")

Logistic Regression Model Accuracy : 0.965034965034965
Logistic Regression Model with L2 Accuracy: 0.965034965034965
Logistic Regression Model with L1 Accuracy: 0.9660339660339661
Logistic Regression Model with Elastic Net Accuracy: 0.965034965034965


In [48]:
# Normal Model
knn_model = KNeighborsClassifier(n_neighbors=21)
knn_model.fit(X_train_scaled, y_train)
print(f"KNNs Model Accuracy : {accuracy_score(y_test, knn_model.predict(X_test_scaled))}")

# Varying Neighbours
print("Varying Neighbours Accuracies")
for k in [4, 8, 16, 32, 64, 128]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    acc = accuracy_score(y_test, knn.predict(X_test_scaled))
    print(f"k={k}, Accuracy={acc:.3f}")

# Manhattin Distance
knn_manhattan = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn_manhattan.fit(X_train_scaled, y_train)

print("Manhattan KNN Accuracy:", accuracy_score(y_test, knn_manhattan.predict(X_test_scaled)))



KNNs Model Accuracy : 0.9630369630369631
Varying Neighbours Accuracies
k=4, Accuracy=0.961
k=8, Accuracy=0.963
k=16, Accuracy=0.960
k=32, Accuracy=0.968
k=64, Accuracy=0.966
k=128, Accuracy=0.967
Manhattan KNN Accuracy: 0.9590409590409591
