# Introduction aux données

In [15]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

admissions = pd.read_csv('admissions.csv')
model = LogisticRegression(solver='liblinear')
model.fit(admissions[['gpa']], admissions['admit'])
labels=model.predict(admissions[['gpa']])
admissions['predicted_label']=labels


In [16]:
print(admissions['predicted_label'].value_counts())

0    598
1     46
Name: predicted_label, dtype: int64


In [17]:
admissions.head()

Unnamed: 0,admit,gpa,gre,predicted_label
0,0,3.177277,594.102992,0
1,0,3.412655,631.528607,0
2,0,2.728097,553.714399,0
3,0,3.093559,551.089985,0
4,0,3.141923,537.184894,0


# Précision

In [22]:
admissions = admissions.rename(columns={"admit" : "actual_label"})

In [23]:
admissions

Unnamed: 0,actual_label,gpa,gre,predicted_label
0,0,3.177277,594.102992,0
1,0,3.412655,631.528607,0
2,0,2.728097,553.714399,0
3,0,3.093559,551.089985,0
4,0,3.141923,537.184894,0
5,0,3.599108,442.763567,0
6,0,3.238972,667.472189,0
7,0,3.420177,561.713905,0
8,0,3.562482,590.340371,0
9,0,3.910495,463.470183,1


In [30]:
matches = (admissions['actual_label'] == admissions['predicted_label'])

In [44]:
matches

0       True
1       True
2       True
3       True
4       True
5       True
6       True
7       True
8       True
9      False
10      True
11      True
12      True
13      True
14      True
15      True
16      True
17      True
18      True
19      True
20      True
21      True
22      True
23      True
24      True
25      True
26      True
27      True
28      True
29      True
       ...  
614     True
615    False
616    False
617     True
618    False
619    False
620    False
621     True
622    False
623    False
624    False
625    False
626    False
627    False
628     True
629    False
630    False
631    False
632    False
633    False
634     True
635    False
636     True
637    False
638    False
639    False
640    False
641    False
642    False
643    False
Length: 644, dtype: bool

In [47]:
correct_predictions = admissions[matches==True]

In [48]:
correct_predictions.head(10)

Unnamed: 0,actual_label,gpa,gre,predicted_label
0,0,3.177277,594.102992,0
1,0,3.412655,631.528607,0
2,0,2.728097,553.714399,0
3,0,3.093559,551.089985,0
4,0,3.141923,537.184894,0
5,0,3.599108,442.763567,0
6,0,3.238972,667.472189,0
7,0,3.420177,561.713905,0
8,0,3.562482,590.340371,0
10,0,3.264341,636.453166,0


In [49]:
accuracy = len(correct_predictions)/len(admissions)

In [50]:
accuracy

0.6459627329192547

# Résultats de la classification binaire

In [89]:
true_positives_temp = (admissions['predicted_label'] == 1) & (admissions['actual_label'] == 1)

In [90]:
true_positives = len(admissions[true_positives_temp])

In [91]:
true_positives

31

In [92]:
true_negatives_temp = (admissions['predicted_label'] == 0) & (admissions['actual_label'] == 0)

In [93]:
true_negatives = len(admissions[true_negatives_temp == True])

In [94]:
true_negatives

385

# Sensibilité

In [101]:
false_negatives_temp = ((admissions['actual_label'] == 1) & (admissions['predicted_label'] == 0))

In [112]:
false_negatives = len(admissions[false_negatives_temp])

In [113]:
false_negatives

213

In [120]:
sensibity = true_positives/(true_positives+false_negatives)

In [121]:
sensibity

0.12704918032786885

# Spécificité

In [131]:
false_positives_temp = (admissions['predicted_label'] == 1) & (admissions['actual_label']==0)

In [132]:
false_positives = len(admissions[false_positives_temp == True])

In [133]:
false_positive

15

In [134]:
specificity = true_negatives/(false_positives+true_negatives)

In [135]:
specificity

0.9625

In [138]:
len(admissions[admissions['actual_label'] == 1])/len(admissions)

0.37888198757763975