## Admisión en la universidad

In [1]:
import pandas as pd
import numpy as np

# import file
df = pd.read_csv('../../res/tp1/binary.csv', sep=',') # columns: admit, gre, gpa, rank; 399 rows
row_count = df.shape[0]
rank_count = df['rank'].value_counts().sort_values()
ranks = np.sort(rank_count.index.values)
first_level_fields = {'gre': ['>= 500', '< 500'], 'gpa': ['>= 3', '< 3']}

In [3]:
rank_probabilities = rank_count.apply(lambda x: (x + 1) / (row_count + len(ranks)))  # P(rank_i)
print(f'Probability for each rank:\n{rank_probabilities}')
fields_rank_probabilities = {key: {} for key in first_level_fields.keys()} # {'gre':{}, 'gpa':{}}
for rank in ranks:
    rank_rows = df.query(f'rank == {rank}')
    for field, rng in first_level_fields.items():
        fields_rank_probabilities[field][rank] = []
        upper_matching = rank_rows.query(f'{field} {rng[0]}') # e.g. 'gre >= 500'
        # Chequear si es necesario Laplace
        fields_rank_probabilities[field][rank].append(len(upper_matching) / len(rank_rows)) # len(gre >= 500 and rank) / len(rank)
        fields_rank_probabilities[field][rank].append((len(rank_rows) - len(upper_matching)) / len(rank_rows)) # gre < 500

# {'gre': {1: [upper, lower], 2: [upper, lower], ...}, 'gpa': {1: [upper, lower], 2: [upper, lower], ...}}
gre_table = pd.DataFrame.from_dict(fields_rank_probabilities['gre'], orient='index', columns=first_level_fields['gre'])
gpa_table = pd.DataFrame.from_dict(fields_rank_probabilities['gpa'], orient='index', columns=first_level_fields['gpa'])
print(f'\nGRE:\n{gre_table}')
print(f'\nGPA:\n{gpa_table}')

Probability for each rank:
1    0.153465
4    0.168317
3    0.301980
2    0.376238
Name: rank, dtype: float64

GRE:
     >= 500     < 500
1  0.809524  0.190476
2  0.810458  0.189542
3  0.788618  0.211382
4  0.782609  0.217391

GPA:
       >= 3       < 3
1  0.857143  0.142857
2  0.823529  0.176471
3  0.829268  0.170732
4  0.797101  0.202899


In [52]:
# Necesitamos P(admit | rank, GRE, GPA) y probabilidad conjunta P(X1, X2, ..., Xn)

admission_probabilities = {}
for rank in ranks:
    # gre_row = gre_table.loc[rank]
    # gpa_row = gpa_table.loc[rank]
    for gre_class in gre_table.columns:
        # p_gre = gre_row[gre_class]
        for gpa_class in gpa_table.columns:
            row_criteria = f'rank == {rank} and gre {gre_class} and gpa {gpa_class}'
            criteria_rows = df.query(row_criteria)
            row_criteria_admissions = criteria_rows[criteria_rows.admit == 1]
            admission_probabilities[row_criteria] = []
            admission_probabilities[row_criteria].append(len(row_criteria_admissions) / len(criteria_rows))
            admission_probabilities[row_criteria].append((len(criteria_rows) - len(row_criteria_admissions)) / len(criteria_rows))
            # p_gpa = gpa_row[gpa_class]
            # p = p_gre * p_gpa
            # admission_probabilities[row_criteria].append(p)
            print(f'Rank: {rank}; GRE: {gre_class}; GPA: {gpa_class}; rows: {len(criteria_rows)}; admission probability: {admission_probabilities[row_criteria][0]}')

print(f'\n{admission_probabilities}')

Rank: 1; GRE: >= 500; GPA: >= 3; rows: 47; admission probability: 0.5531914893617021
Rank: 1; GRE: >= 500; GPA: < 3; rows: 3; admission probability: 1.0
Rank: 1; GRE: < 500; GPA: >= 3; rows: 6; admission probability: 0.5
Rank: 1; GRE: < 500; GPA: < 3; rows: 5; admission probability: 0.2
Rank: 2; GRE: >= 500; GPA: >= 3; rows: 104; admission probability: 0.4230769230769231
Rank: 2; GRE: >= 500; GPA: < 3; rows: 19; admission probability: 0.15789473684210525
Rank: 2; GRE: < 500; GPA: >= 3; rows: 21; admission probability: 0.19047619047619047
Rank: 2; GRE: < 500; GPA: < 3; rows: 7; admission probability: 0.42857142857142855
Rank: 3; GRE: >= 500; GPA: >= 3; rows: 85; admission probability: 0.24705882352941178
Rank: 3; GRE: >= 500; GPA: < 3; rows: 11; admission probability: 0.36363636363636365
Rank: 3; GRE: < 500; GPA: >= 3; rows: 16; admission probability: 0.1875
Rank: 3; GRE: < 500; GPA: < 3; rows: 9; admission probability: 0.0
Rank: 4; GRE: >= 500; GPA: >= 3; rows: 44; admission probabilit