In [55]:
import pandas as pd
import numpy as np
import datetime
import plotly.express as px

In [27]:
pd.set_option('display.max_rows', None)

# College Study Analysis

In [219]:
time = pd.read_csv('schooltime.csv')
time = time.dropna()
time

Unnamed: 0,weekday,date,study_time,study_decimal
0,1,8/21/2017,0:40,0.67
1,2,8/22/2017,0:30,0.5
2,3,8/23/2017,6:23,6.38
3,4,8/24/2017,5:48,5.8
4,5,8/25/2017,5:15,5.25
5,6,8/26/2017,0:00,0.0
6,7,8/27/2017,0:00,0.0
7,1,8/28/2017,6:30,6.5
8,2,8/29/2017,4:00,4.0
9,3,8/30/2017,8:30,8.5


In [220]:
#Create some dataframe for parts of week and no zeros
time_weekdays = time.loc[time['weekday'] <=5]
time_weekends = time.loc[time['weekday'] >=6]

time_nozero = time.loc[time['study_decimal'] > 0]
time_weekdays_nozero = time_weekdays.loc[time_weekdays['study_decimal'] > 0]
time_weekends_nozero = time_weekends.loc[time_weekends['study_decimal'] > 0]

In [221]:
time.dtypes

weekday            int64
date              object
study_time        object
study_decimal    float64
dtype: object

In [222]:
#Print some basic statistics
print('Total Time: ' + str(time['study_decimal'].sum()*7/5))
print('Weekday Time: ' + str(time_weekdays['study_decimal'].sum()*7/5))
print('Weekend Time: ' + str(time_weekends['study_decimal'].sum()*7/5))
print()
print('Average Time Per School Day: ' + str(time['study_decimal'].sum()/5/75))
print('Average Time Per Week: ' + str(time['study_decimal'].sum()/5/15))
print('Average Time Per Week during Week: ' + str(time_weekdays['study_decimal'].sum()/5/15))
print('Average Time Per Week during Weekend: ' + str(time_weekends['study_decimal'].sum()/5/15))
print()
print(time['study_decimal'].describe())

Total Time: 3306.3940000000002
Weekday Time: 3029.1519999999996
Weekend Time: 277.2420000000001

Average Time Per School Day: 6.297893333333334
Average Time Per Week: 31.489466666666665
Average Time Per Week during Week: 28.849066666666666
Average Time Per Week during Weekend: 2.6404000000000005

count    571.000000
mean       4.136095
std        3.496441
min        0.000000
25%        0.300000
50%        4.000000
75%        6.390000
max       14.000000
Name: study_decimal, dtype: float64


In [223]:
#Some more daily statistics

print('Total')
print('Mean: ' + str(time['study_decimal'].mean()))
print('Median: ' + str(time['study_decimal'].median()))
print()

print('Total No Zero Days')
print('Mean: ' + str(time_nozero['study_decimal'].mean()))
print('Median: ' + str(time_nozero['study_decimal'].median()))
print()

print('Weekday No Zero Days')
print('Mean: ' + str(time_weekdays_nozero['study_decimal'].mean()))
print('Median: ' + str(time_weekdays_nozero['study_decimal'].median()))
print()

print('Weekend No Zero Days')
print('Mean: ' + str(time_weekends_nozero['study_decimal'].mean()))
print('Median: ' + str(time_weekends_nozero['study_decimal'].median()))


Total
Mean: 4.136094570928196
Median: 4.0

Total No Zero Days
Mean: 5.429218390804598
Median: 5.03

Weekday No Zero Days
Mean: 5.70891820580475
Median: 5.33

Weekend No Zero Days
Mean: 3.53625
Median: 2.2249999999999996


In [189]:
fig = px.histogram(time_weekdays_nozero, x="study_decimal", title='Time Spent Studying During College (Non-zero Days)')
fig.update_xaxes(title_text='Time Spent Studying (Hours)')
fig.update_yaxes(title_text='Days')
fig.show()

In [224]:
#Generate Per Week Statistics
days = ['NA', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day_avg = [0]
for i in range(1,8):
    print(days[i] + ' Statistics')
    day_df = time_nozero.loc[time['weekday'] == i]
    day_avg.append(day_df['study_decimal'].mean())
    print('Mean: ' + str(day_df['study_decimal'].mean()))
    print('Median: ' + str(day_df['study_decimal'].median()))
    ttl = "Time Spent Studying (Non-Zero) on " + days[i]
    fig = px.histogram(day_df, x="study_decimal", title=ttl)
    fig.update_xaxes(title_text='Time Spent Studying (Hours)')
    fig.update_yaxes(title_text='Number of Days')
    fig.show()
    print()

Monday Statistics
Mean: 5.788356164383562
Median: 5.98



Tuesday Statistics
Mean: 6.363717948717949
Median: 5.55



Wednesday Statistics
Mean: 5.915189873417721
Median: 5.8



Thursday Statistics
Mean: 5.499324324324324
Median: 5.5649999999999995



Friday Statistics
Mean: 4.940133333333333
Median: 4.75



Saturday Statistics
Mean: 3.151904761904762
Median: 1.92



Sunday Statistics
Mean: 3.766857142857143
Median: 3.0





In [225]:
#Generating Yearly Statistics
years = ['2017', '2018', '2019']
year_avg = []
for i in range(0,3):
    print(years[i] + ' Statistics')
    year_df = time_nozero[time_nozero['date'].str.contains(years[i])]
    year_avg.append(year_df['study_decimal'].mean())
    print('Mean: ' + str(year_df['study_decimal'].mean()))
    print('Median: ' + str(year_df['study_decimal'].median()))
    ttl = "Time Spent Studying (Non-Zero) in " + years[i]
    fig = px.histogram(year_df, x="study_decimal", title=ttl)
    fig.update_xaxes(title_text='Time Spent Studying (Hours)')
    fig.update_yaxes(title_text='Number of Days')
    fig.show()
    print()

2017 Statistics
Mean: 5.2067415730337085
Median: 5.25



2018 Statistics
Mean: 5.622159090909091
Median: 5.59



2019 Statistics
Mean: 5.345941176470588
Median: 4.5





In [226]:
day_times = pd.DataFrame(list(zip(days[1:], day_avg[1:])), columns =['day', 'time']) 
day_times
fig = px.bar(day_times, x='day', y='time', title='Time Spent on School Per Day of Week (Non-zero)')
fig.update_xaxes(title_text='Day of Week')
fig.update_yaxes(title_text='Time (Hours)')
fig.show()

In [227]:
year_times = pd.DataFrame(list(zip(years, year_avg)), columns =['year', 'time']) 
year_times
fig = px.bar(year_times, x='year', y='time', title='Average Non-zero Time Spent on School by Year')
fig.update_xaxes(title_text='Year')
fig.update_yaxes(title_text='Time (Hours)')
fig.show()

# High School Analysis

In [228]:
time = pd.read_csv('highschooltime.csv')
time = time.dropna()
time

Unnamed: 0,weekday,date,study_time,study_decimal
19,3.0,2/19/2014,12:00,12.0
24,1.0,2/24/2014,12:00,12.0
26,3.0,2/26/2014,12:00,12.0
59,1.0,3/31/2014,2:38,2.633333
60,2.0,4/1/2014,1:30,1.5
61,3.0,4/2/2014,1:57,1.95
62,4.0,4/3/2014,5:00,5.0
63,5.0,4/4/2014,0:00,0.0
64,6.0,4/5/2014,1:02,1.033333
65,7.0,4/6/2014,7:30,7.5


In [229]:
#Create some dataframe for parts of week and no zeros
time_weekdays = time.loc[time['weekday'] <=5]
time_weekends = time.loc[time['weekday'] >=6]

time_nozero = time.loc[time['study_decimal'] > 0]
time_weekdays_nozero = time_weekdays.loc[time_weekdays['study_decimal'] > 0]
time_weekends_nozero = time_weekends.loc[time_weekends['study_decimal'] > 0]

In [199]:
time.dtypes

weekday          float64
date              object
study_time        object
study_decimal    float64
dtype: object

In [230]:
#Print some basic statistics
print('Total Time: ' + str(time['study_decimal'].sum()))
print('Weekday Time: ' + str(time_weekdays['study_decimal'].sum()))
print('Weekend Time: ' + str(time_weekends['study_decimal'].sum()))
print()
print('Average Time Per School Day: ' + str(time['study_decimal'].sum()/6.4/90))
print('Average Time Per Week: ' + str(time['study_decimal'].sum()/5/15))
print('Average Time Per Week during Week: ' + str(time_weekdays['study_decimal'].sum()/6.4/90))
print('Average Time Per Week during Weekend: ' + str(time_weekends['study_decimal'].sum()/6.4/90))
print()
print(time['study_decimal'].describe())

Total Time: 1162.066666667
Weekday Time: 921.66666667
Weekend Time: 240.399999997

Average Time Per School Day: 2.01747685185243
Average Time Per Week: 15.494222222226664
Average Time Per Week during Week: 1.6001157407465276
Average Time Per Week during Weekend: 0.41736111110590274

count    810.000000
mean       1.434650
std        1.770136
min        0.000000
25%        0.000000
50%        0.933333
75%        2.329167
max       12.000000
Name: study_decimal, dtype: float64


In [204]:
#Some more daily statistics

print('Total')
print('Mean: ' + str(time['study_decimal'].mean()))
print('Median: ' + str(time['study_decimal'].median()))
print()

print('Total No Zero Days')
print('Mean: ' + str(time_nozero['study_decimal'].mean()))
print('Median: ' + str(time_nozero['study_decimal'].median()))
print()

print('Weekday No Zero Days')
print('Mean: ' + str(time_weekdays_nozero['study_decimal'].mean()))
print('Median: ' + str(time_weekdays_nozero['study_decimal'].median()))
print()

print('Weekend No Zero Days')
print('Mean: ' + str(time_weekends_nozero['study_decimal'].mean()))
print('Median: ' + str(time_weekends_nozero['study_decimal'].median()))


Total
Mean: 1.4346502057617283
Median: 0.933333333

Total No Zero Days
Mean: 2.209252217998099
Median: 1.8333333330000001

Weekday No Zero Days
Mean: 2.123655913986175
Median: 1.75

Weekend No Zero Days
Mean: 2.6130434782282608
Median: 2.4166666665000003


In [231]:
fig = px.histogram(time_weekdays_nozero, x="study_decimal", title='Time Spent Studying During High School (Non-zero Days)')
fig.update_xaxes(title_text='Time Spent Studying (Hours)')
fig.update_yaxes(title_text='Days')
fig.show()

In [207]:
#Generate Per Week Statistics
days = ['NA', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day_avg = [0]
for i in range(1,8):
    print(days[i] + ' Statistics')
    day_df = time_nozero.loc[time['weekday'] == i]
    day_avg.append(day_df['study_decimal'].mean())
    print('Mean: ' + str(day_df['study_decimal'].mean()))
    print('Median: ' + str(day_df['study_decimal'].median()))
    ttl = "Time Spent Studying (Non-Zero) on " + days[i]
    fig = px.histogram(day_df, x="study_decimal", title=ttl)
    fig.update_xaxes(title_text='Time Spent Studying (Hours)')
    fig.update_yaxes(title_text='Number of Days')
    fig.show()
    print()

Monday Statistics
Mean: 2.1631172839259256
Median: 1.9916666665



Tuesday Statistics
Mean: 1.8795751634411764
Median: 1.6333333330000002



Wednesday Statistics
Mean: 2.580555555588889
Median: 2.0



Thursday Statistics
Mean: 1.999444444433333
Median: 1.4583333330000001



Friday Statistics
Mean: 1.9121212121136366
Median: 1.8416666665



Saturday Statistics
Mean: 2.1570175438421053
Median: 2.0



Sunday Statistics
Mean: 2.7317351597808224
Median: 2.5





In [209]:
#Generating Yearly Statistics
years = ['2014', '2015', '2016', '2017']
year_avg = []
for i in range(0,4):
    print(years[i] + ' Statistics')
    year_df = time_nozero[time_nozero['date'].str.contains(years[i])]
    year_avg.append(year_df['study_decimal'].mean())
    print('Mean: ' + str(year_df['study_decimal'].mean()))
    print('Median: ' + str(year_df['study_decimal'].median()))
    ttl = "Time Spent Studying (Non-Zero) in " + years[i]
    fig = px.histogram(year_df, x="study_decimal", title=ttl)
    fig.update_xaxes(title_text='Time Spent Studying (Hours)')
    fig.update_yaxes(title_text='Number of Days')
    fig.show()
    print()

2014 Statistics
Mean: 2.922133333328
Median: 2.5



2015 Statistics
Mean: 2.078333333305882
Median: 1.8583333335



2016 Statistics
Mean: 1.7788954635384615
Median: 1.3666666669999998



2017 Statistics
Mean: 2.3040322580806447
Median: 2.05





In [213]:
day_times = pd.DataFrame(list(zip(days[1:], day_avg[1:])), columns =['day', 'time']) 
day_times
fig = px.bar(day_times, x='day', y='time', title='Time Spent on School Per Day of Week (Non-zero)')
fig.update_xaxes(title_text='Day of Week')
fig.update_yaxes(title_text='Time (Hours)')
fig.show()

In [217]:
year_times = pd.DataFrame(list(zip(years, year_avg)), columns =['year', 'time']) 
year_times
fig = px.bar(year_times, x='year', y='time', title='Average Non-zero Time Spent on School by Year')
fig.update_xaxes(title_text='Year')
fig.update_yaxes(title_text='Time (Hours)')
fig.show()