In [1]:
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import seaborn as sns
import env
import acquire
import prepare

# Summary Explanation and Takeaway

This notebook seeks to answer question 5: At some point in 2019, the ability for students and alumni to access both curriculums (web dev to ds, ds to web dev) should have been shut off. Do you see any evidence of that happening? Did it happen before?

I seperated the Data Science and Web Dev students, then listed the most popular paths viewed for each.
I looked at entries where DS students look at WD material and vice versa.

This is what I found:

### Data Science Takeaway

There does not appear to be any access to the Web Dev paths before September 2019, and after December 2019. Which leads me to believe they did not have access during those time periods.

### Web Dev Takeaway

There appears to be access to the Data Science paths after September 2019. Specifically, from July 2020 to April 2021.

In [2]:
df = acquire.get_data()
df.head()

Unnamed: 0,date,time,ip,path,user_id,cohort_id,cohort_name,slack,start_date,end_date,program_id
0,2018-01-26,09:55:03,97.105.19.61,/,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1
1,2018-01-26,09:56:02,97.105.19.61,java-ii,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1
2,2018-01-26,09:56:05,97.105.19.61,java-ii/object-oriented-programming,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1
3,2018-01-26,09:56:06,97.105.19.61,slides/object_oriented_programming,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1
4,2018-01-26,09:56:24,97.105.19.61,javascript-i/conditionals,2,22.0,Teddy,#teddy,2018-01-08,2018-05-17,2


In [3]:
df = prepare.prep_data(df)
df.head()

Unnamed: 0_level_0,date,time,ip,path,user_id,cohort_id,cohort_name,slack,start_date,end_date,program_id,program
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-26,2018-01-26,09:55:03,97.105.19.61,/,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1,Full Stack PHP
2018-01-26,2018-01-26,09:56:02,97.105.19.61,java-ii,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1,Full Stack PHP
2018-01-26,2018-01-26,09:56:05,97.105.19.61,java-ii/object-oriented-programming,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1,Full Stack PHP
2018-01-26,2018-01-26,09:56:06,97.105.19.61,slides/object_oriented_programming,1,8.0,Hampton,#hampton,2015-09-22,2016-02-06,1,Full Stack PHP
2018-01-26,2018-01-26,09:56:24,97.105.19.61,javascript-i/conditionals,2,22.0,Teddy,#teddy,2018-01-08,2018-05-17,2,Full Stack Java


In [14]:
df.shape

(847329, 12)

# Question 5

## At some point in 2019, the ability for students and alumni to access both curriculums (web dev to ds, ds to web dev) should have been shut off. Do you see any evidence of that happening? Did it happen before?

In [4]:
df.program.value_counts()

Full Stack Java    713365
Data Science       103411
Full Stack PHP      30548
Front End               5
Name: program, dtype: int64

In [18]:
# create data science data frame
ds = df[df.program_id == 3]

In [21]:
# create program 1 web dev dataframe
wd1 = df[df.program_id == 1]

In [20]:
# create program 2 webdev data frame
wd2 = df[df.program_id == 2]

In [23]:
# concatenate both web dev data frames into one df
wd = pd.concat([wd1, wd2])

In [25]:
# verify correct length of new web dev dataframe
len(wd) == len(wd1) + len(wd2)

True

In [39]:
# most popular web dev paths viewed
wd.path.value_counts().head(20)

/                                                                            37495
javascript-i                                                                 18193
toc                                                                          17580
search/search_index.json                                                     15331
java-iii                                                                     13162
html-css                                                                     13111
java-ii                                                                      12173
spring                                                                       11877
jquery                                                                       11037
mysql                                                                        10602
java-i                                                                       10460
javascript-ii                                                                10290
appe

## Find proof that DS had access to WD pre-2019

This section lists the data science students looking at popular web dev paths to see when/if they had acces.

In [33]:
ds.path[ds.path== 'java-i']

date
2019-09-25    java-i
2019-09-25    java-i
2019-09-25    java-i
2019-09-25    java-i
2019-11-25    java-i
2019-11-26    java-i
2019-12-03    java-i
Name: path, dtype: object

In [34]:
ds.path[ds.path== 'java-ii']

date
2019-09-25    java-ii
2019-11-25    java-ii
2019-12-03    java-ii
2019-12-03    java-ii
Name: path, dtype: object

In [35]:
ds.path[ds.path== 'java-iii']

date
2019-09-23    java-iii
2019-11-25    java-iii
2019-11-25    java-iii
2019-12-03    java-iii
Name: path, dtype: object

In [36]:
ds.path[ds.path== 'jquery']

date
2019-11-25    jquery
2019-12-03    jquery
2019-12-03    jquery
2019-12-04    jquery
Name: path, dtype: object

In [41]:
ds.path[ds.path== 'javascript-i']

date
2019-09-25    javascript-i
2019-09-25    javascript-i
2019-09-25    javascript-i
2019-09-25    javascript-i
2019-11-25    javascript-i
2019-11-26    javascript-i
2019-11-26    javascript-i
2019-12-03    javascript-i
2019-12-03    javascript-i
2019-12-14    javascript-i
Name: path, dtype: object

In [44]:
ds.path[ds.path== 'javascript-ii']

date
2019-11-25    javascript-ii
2019-11-26    javascript-ii
2019-12-03    javascript-ii
2019-12-03    javascript-ii
Name: path, dtype: object

In [46]:
ds.path[ds.path== 'javascript-iii']

Series([], Name: path, dtype: object)

In [42]:
ds.path[ds.path== 'html-css']

date
2019-09-25    html-css
2019-09-25    html-css
2019-09-25    html-css
2019-09-27    html-css
2019-11-25    html-css
2019-11-25    html-css
2019-11-26    html-css
2019-11-26    html-css
2019-11-26    html-css
2019-11-26    html-css
2019-12-03    html-css
2019-12-03    html-css
2019-12-04    html-css
2019-12-13    html-css
2019-12-13    html-css
2019-12-14    html-css
Name: path, dtype: object

In [47]:
ds.path[ds.path== 'spring']

date
2019-11-25    spring
2019-11-26    spring
2019-11-26    spring
2019-11-26    spring
2019-12-03    spring
2020-11-19    spring
Name: path, dtype: object

### Data Science Takeaway

There does not appear to be any access to the Web Dev paths before September 2019, and after December 2019. Which leads me to believe they did not have access during those time periods.

## Find proof that WD had access to DS.

In [48]:
# Most popular Data Science paths
ds.path.value_counts().head(20)

/                                                    8358
search/search_index.json                             2203
classification/overview                              1785
1-fundamentals/modern-data-scientist.jpg             1655
1-fundamentals/AI-ML-DL-timeline.jpg                 1651
1-fundamentals/1.1-intro-to-data-science             1633
classification/scale_features_or_not.svg             1590
fundamentals/AI-ML-DL-timeline.jpg                   1443
fundamentals/modern-data-scientist.jpg               1438
sql/mysql-overview                                   1424
fundamentals/intro-to-data-science                   1413
6-regression/1-overview                              1124
anomaly-detection/AnomalyDetectionCartoon.jpeg        829
anomaly-detection/overview                            804
10-anomaly-detection/AnomalyDetectionCartoon.jpeg     754
10-anomaly-detection/1-overview                       751
3-sql/1-mysql-overview                                707
1-fundamentals

This section lists the web dev students looking at popular data science paths to see when/if they had acces.

In [52]:
wd.path[wd.path== 'classification/overview']

date
2020-07-22    classification/overview
2020-07-22    classification/overview
2020-07-22    classification/overview
2020-07-22    classification/overview
2020-07-22    classification/overview
                       ...           
2021-04-20    classification/overview
2021-04-20    classification/overview
2021-04-21    classification/overview
2021-04-21    classification/overview
2021-04-21    classification/overview
Name: path, Length: 852, dtype: object

In [55]:
wd.path[wd.path== 'fundamentals/intro-to-data-science']

date
2020-07-22    fundamentals/intro-to-data-science
2020-07-22    fundamentals/intro-to-data-science
2020-07-22    fundamentals/intro-to-data-science
2020-07-22    fundamentals/intro-to-data-science
2020-07-22    fundamentals/intro-to-data-science
                             ...                
2021-04-20    fundamentals/intro-to-data-science
2021-04-20    fundamentals/intro-to-data-science
2021-04-21    fundamentals/intro-to-data-science
2021-04-21    fundamentals/intro-to-data-science
2021-04-21    fundamentals/intro-to-data-science
Name: path, Length: 471, dtype: object

In [57]:
wd.path[wd.path== 'stats/compare-means']

date
2020-08-26    stats/compare-means
2020-08-26    stats/compare-means
2020-08-26    stats/compare-means
2020-08-26    stats/compare-means
2020-08-26    stats/compare-means
                     ...         
2021-03-10    stats/compare-means
2021-03-13    stats/compare-means
2021-03-17    stats/compare-means
2021-03-22    stats/compare-means
2021-04-11    stats/compare-means
Name: path, Length: 111, dtype: object

### Web Dev Takeaway

There appears to be access to the Data Science paths after September 2019. Specifically, from July 2020 to April 2021.