In [1]:
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
#import columns that have to do with grades by subject
grades = pd.read_csv(r"\Users\mnorm\Documents\NSS\Capstone\data\National Longitudinal Study of Adolescent to Adult Health\ICPSR_21600\DS0001\21600-0001-Data.tsv", sep = '\t', usecols = ['AID','H1ED11', 'H1ED12', 'H1ED13', 'H1ED14'], low_memory = False)
grades = grades.rename(columns = {'AID': 'AID','H1ED11': 'English', 'H1ED12' : 'Math', 'H1ED13' : 'History', 'H1ED14' : 'Science'})
grades.head()

Unnamed: 0,AID,English,Math,History,Science
0,57100270,97,97,97,97
1,57101310,2,5,3,3
2,57103171,2,4,2,3
3,57103869,3,4,3,2
4,57104553,2,1,3,2


In [3]:
key = [1,2,3,4,5,6,96,97,98]
grade = ['A', 'B', 'C', 'D or lower', 'subject not taken', 'diff grading system', 'refusesd', 'legitimate skip', 'unknown']
grade_code = pd.DataFrame(list(zip(key, grade)), columns = ['index', 'grade'])

In [4]:
English = pd.DataFrame(grades['English'].value_counts()).reset_index(drop = False)

In [5]:
History = pd.DataFrame(grades['History'].value_counts()).reset_index(drop = False)

In [6]:
Math = pd.DataFrame(grades['Math'].value_counts()).reset_index(drop = False)

In [7]:
Science = pd.DataFrame(grades['Science'].value_counts()).reset_index(drop = False)

In [8]:
dfs = [grade_code, English, History, Math, Science]
gradebysub = reduce(lambda left, right: pd.merge(left,right, on = 'index', how = 'inner'), dfs)
gradebysub

Unnamed: 0,index,grade,English,History,Math,Science
0,1,A,1712,1931,1552,1759
1,2,B,2389,1853,1899,1860
2,3,C,1435,1182,1521,1282
3,4,D or lower,647,664,950,685
4,5,subject not taken,87,652,353,706
5,6,diff grading system,58,50,69,50
6,96,refusesd,5,5,4,5
7,97,legitimate skip,128,128,128,128
8,98,unknown,43,39,28,29


In [9]:
#create total column and row
#remove subject not taken and legitimate skip 
#group AID in grades by grades 
#EDA peer connection 
#compare peer connection and grades by EDA

In [10]:
A_students = grades.loc[(grades['English'] == 1) & (grades['History'] == 1) & (grades['Math'] == 1) & (grades['Science'] == 1)]
A_students

Unnamed: 0,AID,English,Math,History,Science
17,57120005,1,1,1,1
34,57136630,1,1,1,1
41,57145151,1,1,1,1
52,57159952,1,1,1,1
55,57163657,1,1,1,1
...,...,...,...,...,...
6453,99716929,1,1,1,1
6454,99716958,1,1,1,1
6457,99716977,1,1,1,1
6476,99718043,1,1,1,1


In [11]:
AB_students = grades.loc[(grades['English'] <= 2) & (grades['Math'] <= 2) & (grades['History'] <= 2) & (grades['Science'] <= 2)]
len(AB_students)

1697

In [12]:
C_below_students = grades.loc[(grades['English'].between(3,4)) & (grades['Math'].between(3,4)) & (grades['History'].between(3,4)) & (grades['Science'].between(3,4))]
len(C_below_students)

404

In [13]:
#import columns about feeling connected/safe with peers
trouble_with = pd.read_csv(r"C:\Users\mnorm\Documents\NSS\Capstone\data\National Longitudinal Study of Adolescent to Adult Health\ICPSR_21600\DS0001\21600-0001-Data.tsv", sep = '\t', usecols = ['AID', 'H1ED15', 'H1ED16', 'H1ED17', 'H1ED18'], low_memory = False)
trouble_with.head()

Unnamed: 0,AID,H1ED15,H1ED16,H1ED17,H1ED18
0,57100270,7,7,7,7
1,57101310,2,3,0,2
2,57103171,1,2,2,1
3,57103869,4,3,3,4
4,57104553,0,0,4,3


In [14]:
trouble_with = trouble_with.rename(columns = {'AID':'AID','H1ED15':'trouble_get_along_teachers', 'H1ED16':'trouble_paying_attention', 'H1ED17':'trouble_getting_homework_done', 'H1ED18':'trouble_get_along_students'})
trouble_with

Unnamed: 0,AID,trouble_get_along_teachers,trouble_paying_attention,trouble_getting_homework_done,trouble_get_along_students
0,57100270,7,7,7,7
1,57101310,2,3,0,2
2,57103171,1,2,2,1
3,57103869,4,3,3,4
4,57104553,0,0,4,3
...,...,...,...,...,...
6499,99719930,1,0,1,2
6500,99719939,0,0,0,0
6501,99719970,1,3,4,4
6502,99719976,1,1,1,1


In [15]:
teachers = pd.DataFrame(trouble_with['trouble_get_along_teachers'].value_counts()).reset_index(drop = False)

In [16]:
attention = pd.DataFrame(trouble_with['trouble_paying_attention'].value_counts()).reset_index(drop = False)

In [17]:
homework = pd.DataFrame(trouble_with['trouble_getting_homework_done'].value_counts()).reset_index(drop = False)

In [18]:
students = pd.DataFrame(trouble_with['trouble_get_along_students'].value_counts()).reset_index(drop = False)

In [19]:
dfs2 = [teachers, attention, homework, students]
trouble_with_counts = reduce(lambda left, right: pd.merge(left, right, on = 'index', how = 'inner'), dfs2)

In [20]:
code = [0,1,2,3,4,6,7,8] 
survey = ['never', 'just a few times', 'about once a week', 'almost everyday', 'everyday', 'refused', 'legitimate skip','don’t know']
trouble_with_code = pd.DataFrame(list(zip(code, survey)), columns = ['index', 'answer'])
trouble_with_code

Unnamed: 0,index,answer
0,0,never
1,1,just a few times
2,2,about once a week
3,3,almost everyday
4,4,everyday
5,6,refused
6,7,legitimate skip
7,8,don’t know


In [21]:
trouble = pd.merge(trouble_with_code, trouble_with_counts, how = 'inner', on = 'index')

In [22]:
conn_safety = pd.read_csv(r"C:\Users\mnorm\Documents\NSS\Capstone\data\National Longitudinal Study of Adolescent to Adult Health\ICPSR_21600\DS0001\21600-0001-Data.tsv", sep = '\t', usecols = ['AID', 'H1ED19', 'H1ED20', 'H1ED21', 'H1ED22', 'H1ED23', 'H1ED24'], low_memory = False)

In [23]:
conn_safety = conn_safety.rename(columns = {'AID':'AID','H1ED19':'closeto_peers', 'H1ED20':'partof_school', 'H1ED21':'students_prejudiced', 'H1ED22':'happy_at_school', 'H1ED23':'teachers_fair','H1ED24':'safein_school'})

In [24]:
peers = pd.DataFrame(conn_safety['closeto_peers'].value_counts()).reset_index(drop = False)

In [25]:
school = pd.DataFrame(conn_safety['partof_school'].value_counts()).reset_index(drop = False)

In [26]:
prejudice = pd.DataFrame(conn_safety['students_prejudiced'].value_counts()).reset_index(drop = False)

In [27]:
happy = pd.DataFrame(conn_safety['happy_at_school'].value_counts()).reset_index(drop = False)

In [28]:
fair = pd.DataFrame(conn_safety['teachers_fair'].value_counts()).reset_index(drop = False)

In [29]:
safe = pd.DataFrame(conn_safety['safein_school'].value_counts()).reset_index(drop = False)

In [30]:
dfs3 = [peers, school, prejudice, happy, fair, safe]

conn_safety_counts = reduce(lambda left, right: pd.merge(left, right, on = 'index', how = 'inner'), dfs3)

In [31]:
code = [1,2,3,4,5,6,7,8]
survey = ['strongly agree', 'agree', 'neither agree nor disagree','disagree', 'strongly disagree' ,'refused', 'legitimate skip', 'don’t know']

conn_safety_code = pd.DataFrame(list(zip(code, survey)), columns = ['index', 'survey'])

In [32]:
conn_safety_ans = pd.merge(conn_safety_code, conn_safety_counts, how = 'inner', on = 'index')
conn_safety_ans

Unnamed: 0,index,survey,closeto_peers,partof_school,students_prejudiced,happy_at_school,teachers_fair,safein_school
0,1,strongly agree,1273,1677,878,1574,1015,1622
1,2,agree,3025,3039,1713,2629,2660,2908
2,3,neither agree nor disagree,1214,881,1523,1080,1395,1044
3,4,disagree,627,564,1589,706,971,577
4,5,strongly disagree,227,205,644,376,326,216
5,6,refused,4,4,4,5,4,4
6,7,legitimate skip,128,128,128,128,128,128
7,8,don’t know,6,6,25,6,5,5


In [33]:
conn_safety.head()

Unnamed: 0,AID,closeto_peers,partof_school,students_prejudiced,happy_at_school,teachers_fair,safein_school
0,57100270,7,7,7,7,7,7
1,57101310,2,2,3,2,5,4
2,57103171,2,2,2,1,4,2
3,57103869,2,1,4,4,4,4
4,57104553,2,1,2,1,2,2


In [34]:
close_peers = conn_safety.loc[(conn_safety['closeto_peers'] <= 2)]
close_peers

Unnamed: 0,AID,closeto_peers,partof_school,students_prejudiced,happy_at_school,teachers_fair,safein_school
1,57101310,2,2,3,2,5,4
2,57103171,2,2,2,1,4,2
3,57103869,2,1,4,4,4,4
4,57104553,2,1,2,1,2,2
5,57104649,2,1,4,2,3,1
...,...,...,...,...,...,...,...
6496,99718925,2,1,4,2,3,2
6497,99718944,2,1,3,1,3,1
6498,99719378,2,1,1,1,2,1
6499,99719930,2,2,5,3,3,2


In [35]:
gradeA_peers = pd.merge(A_students, close_peers, how = 'left', on = 'AID')
gradeA_peers

Unnamed: 0,AID,English,Math,History,Science,closeto_peers,partof_school,students_prejudiced,happy_at_school,teachers_fair,safein_school
0,57120005,1,1,1,1,2.0,2.0,4.0,2.0,3.0,2.0
1,57136630,1,1,1,1,2.0,2.0,4.0,1.0,2.0,2.0
2,57145151,1,1,1,1,1.0,1.0,1.0,2.0,2.0,2.0
3,57159952,1,1,1,1,,,,,,
4,57163657,1,1,1,1,2.0,1.0,1.0,1.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...
421,99716929,1,1,1,1,2.0,2.0,3.0,1.0,1.0,1.0
422,99716958,1,1,1,1,2.0,2.0,3.0,2.0,1.0,1.0
423,99716977,1,1,1,1,2.0,2.0,3.0,2.0,3.0,2.0
424,99718043,1,1,1,1,1.0,1.0,3.0,1.0,1.0,1.0


In [36]:
gradeA_peers.isna().sum()

AID                     0
English                 0
Math                    0
History                 0
Science                 0
closeto_peers          89
partof_school          89
students_prejudiced    89
happy_at_school        89
teachers_fair          89
safein_school          89
dtype: int64

In [37]:
C_below_peers = pd.merge(C_below_students, close_peers, how = 'left', on = 'AID')
C_below_peers

Unnamed: 0,AID,English,Math,History,Science,closeto_peers,partof_school,students_prejudiced,happy_at_school,teachers_fair,safein_school
0,57109625,3,3,4,4,,,,,,
1,57129567,4,4,4,4,,,,,,
2,57131909,3,3,4,3,2.0,1.0,3.0,2.0,2.0,2.0
3,57166463,3,4,3,3,2.0,2.0,5.0,2.0,3.0,2.0
4,57197327,4,4,4,4,2.0,2.0,3.0,2.0,3.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...
399,99715268,4,3,4,4,2.0,4.0,5.0,4.0,5.0,3.0
400,99715350,3,4,4,3,,,,,,
401,99716994,4,3,4,4,,,,,,
402,99719970,4,4,4,4,,,,,,


In [38]:
C_below_peers.isna().sum()

AID                      0
English                  0
Math                     0
History                  0
Science                  0
closeto_peers          183
partof_school          183
students_prejudiced    183
happy_at_school        183
teachers_fair          183
safein_school          183
dtype: int64