In [12]:
import pandas as pd

# Load the data set
data = pd.read_csv('anthrokids.csv')

# Separate male and female datasets
male_data = data[data['gender'] == 'M']
female_data = data[data['gender'] == 'F']

# Extract age and height measurements
male_classes = male_data['age']
male_features = male_data['height']
female_classes = female_data['age']
female_features = female_data['height']


In [13]:
from sklearn.model_selection import train_test_split

# Split male dataset into training and test sets
male_features_train, male_features_test, male_classes_train, male_classes_test = train_test_split(
    male_features, male_classes, test_size=0.33, random_state=42)

# Split female dataset into training and test sets
female_features_train, female_features_test, female_classes_train, female_classes_test = train_test_split(
    female_features, female_classes, test_size=0.33, random_state=42)


In [17]:
from scipy.stats import norm

# Parameters for male classes
male_class_params = {}
for age in range(3, 19):
    if age in male_classes_train.unique():
        age_data = male_features_train[male_classes_train == age]
        mean = age_data.mean()
        std = age_data.std()
        male_class_params[age] = (mean, std)

# Prior probabilities for male classes
male_prior_probs = male_classes_train.value_counts(normalize=True)

# Parameters for female classes
female_class_params = {}
for age in range(3, 19):
    if age in female_classes_train.unique():
        age_data = female_features_train[female_classes_train == age]
        mean = age_data.mean()
        std = age_data.std()
        female_class_params[age] = (mean, std)

# Prior probabilities for female classes
female_prior_probs = female_classes_train.value_counts(normalize=True)


In [18]:
def calculate_posterior_probability(feature, mean, std):
    return norm.pdf(feature, loc=mean, scale=std)

def classify_data(features, class_params, prior_probs):
    predictions = []
    for feature in features:
        max_posterior = 0
        predicted_class = None
        for age, (mean, std) in class_params.items():
            posterior = calculate_posterior_probability(feature, mean, std) * prior_probs[age]
            if posterior > max_posterior:
                max_posterior = posterior
                predicted_class = age
        predictions.append(predicted_class)
    return predictions

# Classify male test data
male_predictions = classify_data(male_features_test, male_class_params, male_prior_probs)

# Classify female test data
female_predictions = classify_data(female_features_test, female_class_params, female_prior_probs)

# Check accuracy for male predictions
male_accuracy = (male_predictions == male_classes_test).mean()
print(f"Male recognition rate: {male_accuracy * 100}%")

# Check accuracy for female predictions
female_accuracy = (female_predictions == female_classes_test).mean()
print(f"Female recognition rate: {female_accuracy * 100}%")


Male recognition rate: 0.0%
Female recognition rate: 0.0%


In [19]:
def calculate_recognition_rate(predictions, true_classes):
    correct = (predictions == true_classes).sum()
    total = len(true_classes)
    recognition_rate = correct / total
    return recognition_rate

# Classify male test data
male_predictions = classify_data(male_features_test, male_class_params, male_prior_probs)

# Classify female test data
female_predictions = classify_data(female_features_test, female_class_params, female_prior_probs)

# Calculate recognition rates
male_recognition_rate = calculate_recognition_rate(male_predictions, male_classes_test)
female_recognition_rate = calculate_recognition_rate(female_predictions, female_classes_test)

print(f"Male recognition rate: {male_recognition_rate * 100:.2f}%")
print(f"Female recognition rate: {female_recognition_rate * 100:.2f}%")


Male recognition rate: 0.00%
Female recognition rate: 0.00%
