In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

In [None]:
teachers_df = pd.read_csv('../data/teachers_df.csv', index_col=0)
teachers_df = teachers_df[['teacher_id',
                           'teacher_name',
                           'location',
                           'city',
                           'country',
                           'followers',
                           'languages',
                           'date_joined',
                           'about',
                           'image_url']]
teachers_df['date_joined'] = pd.to_datetime(teachers_df.date_joined)
#teachers_df.head()

In [None]:
teachers_languages_df = pd.read_csv('../data/teachers_languages_df.csv', index_col=0)
teachers_languages_df.head()

In [None]:
meditations_df = pd.read_csv('../data/meditations_df.csv', index_col=0)
meditations_df = meditations_df[['teacher_id',
                                 'meditation_id',
                                 'title',
                                 'upload_date',
                                 'duration',
                                 'plays',
                                 'rating',
                                 'reviews',
                                 'track_type',
                                 'activity',
                                 'suitable_for',
                                 'description',
                                 'meditation_url',
                                 'image_url']]
meditations_df['upload_date'] = pd.to_datetime(meditations_df.upload_date)
meditations_df['duration'] = pd.to_timedelta(meditations_df.duration)
meditations_df.info()

In [None]:
meditations_topics_df = pd.read_csv('../data/meditations_topics_df.csv', index_col=0)
meditations_topics_df.head()

In [None]:
topics_df = pd.read_csv('../data/topics_df.csv', index_col=0)
topics_df.head()

# Growth of Meditations

In [None]:
meditations_df
for index, row in meditations_df.iterrows():
    meditations_df.loc[index,'duration_min'] = row.duration.seconds / 60
    meditations_df.loc[index,'upload_year'] = row.upload_date.year

In [None]:
med_growth = meditations_df.groupby(by=['upload_year']).agg({'plays':['count','sum']})
med_growth.columns = ['_'.join(col) for col in med_growth.columns.values]
med_growth = med_growth.reset_index(drop=False)
med_growth

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010']
#palettedict = {'No AH':'#0F0064',
#               'Built before 2010':'#F3C400',
#               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(10,5))
lp = sns.lineplot(data=med_growth,
         x='upload_year',
         y='plays_count',
         marker='o',
         markersize=5,
#         hue='has_project_2010',
#         hue_order=hue_order,
#         palette=palettedict
         )
#lp.set_ylim(0,500000)
plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('')
plt.ylabel('Number of Meditations Uploaded each Year',
          fontweight = 'bold',
          fontsize = 12)
plt.legend(loc="lower right")
#lp.set_xticks([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022])
#lp.set_yticks([0,100000,200000,300000,400000,500000])
#lp.set_yticklabels(['$0','$100,000','$200,000','$300,000','$400,000','$500,000'])
#plt.text(x = 2014, y=100000, s='AH in 9 of these 13 tracts by 2014', fontsize = 10,ha='left')
#plt.text(x = 2018, y=200000, s='AH in all of these tracts by 2018', fontsize = 10,ha='left')
;

In [None]:
total_meditations = 0
med_growth=med_growth.sort_values(by=['upload_year'])
for index, row in med_growth.iterrows():
    med_growth.loc[index,'total_meditations'] = row.plays_count + total_meditations
    total_meditations=row.plays_count + total_meditations
med_growth

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010']
#palettedict = {'No AH':'#0F0064',
#               'Built before 2010':'#F3C400',
#               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(10,5))
lp = sns.lineplot(data=med_growth,
         x='upload_year',
         y='total_meditations',
         marker='o',
         markersize=5,
#         hue='has_project_2010',
#         hue_order=hue_order,
#         palette=palettedict
         )
#lp.set_ylim(0,500000)
plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('')
plt.ylabel('Total Meditations',
          fontweight = 'bold',
          fontsize = 12)
plt.legend(loc="lower right")
#lp.set_xticks([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022])
#lp.set_yticks([0,100000,200000,300000,400000,500000])
#lp.set_yticklabels(['$0','$100,000','$200,000','$300,000','$400,000','$500,000'])
#plt.text(x = 2014, y=100000, s='AH in 9 of these 13 tracts by 2014', fontsize = 10,ha='left')
#plt.text(x = 2018, y=200000, s='AH in all of these tracts by 2018', fontsize = 10,ha='left')
;

In [None]:
#for each topic, count of meditations? Median number of plays?
med_growth_topic = meditations_df.merge(meditations_topics_df, how='inner',on=['meditation_id'])
med_growth_topic = med_growth_topic.groupby(by=['topic','upload_year']).agg({'meditation_id':['count']})
med_growth_topic.columns = ['_'.join(col) for col in med_growth_topic.columns.values]
med_growth_topic = med_growth_topic.reset_index(drop=False)
med_growth_topic.head()

In [None]:
top5topics = list(med_growth_topic.loc[med_growth_topic.upload_year == 2022].sort_values(by=['meditation_id_count'],ascending=False).head(5).topic.values)
top5topics

In [None]:
med_growth_topic = med_growth_topic.loc[med_growth_topic['topic'].isin(top5topics)]

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010']
#palettedict = {'No AH':'#0F0064',
#               'Built before 2010':'#F3C400',
#               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(10,5))
lp = sns.lineplot(data=med_growth_topic,
         x='upload_year',
         y='meditation_id_count',
         marker='o',
         markersize=5,
         hue='topic',
#         hue_order=hue_order,
#         palette=palettedict
         )
#lp.set_ylim(0,500000)
plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('')
plt.ylabel('# Uploaded Meditations',
          fontweight = 'bold',
          fontsize = 12)
plt.legend(loc="upper left")
#lp.set_xticks([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022])
#lp.set_yticks([0,100000,200000,300000,400000,500000])
#lp.set_yticklabels(['$0','$100,000','$200,000','$300,000','$400,000','$500,000'])
#plt.text(x = 2014, y=100000, s='AH in 9 of these 13 tracts by 2014', fontsize = 10,ha='left')
#plt.text(x = 2018, y=200000, s='AH in all of these tracts by 2018', fontsize = 10,ha='left')
;

In [None]:
#med_growth_topic = med_growth[['meditation_id',
#                               'upload_date',
#                               'upload_year',
#                               'topic']]
#med_growth_topic = .

***After joining with topics, can't take an overall sum of meditations!!!***

## Growth of Teachers

Growth in Teachers, Growth in Meditations

In [None]:
for index, row in teachers_df.iterrows():
    teachers_df.loc[index,'years_since_joined'] = 2022 - row.date_joined.year
    teachers_df.loc[index,'year_joined'] = row.date_joined.year
teachers_df.head()

# Testing Start

In [None]:
teachers_test=teachers_df.copy()

In [None]:
teachers_test.loc[(teachers_test.country == 'canada') & (teachers_test.year_joined == 2016)]

# Testing End

In [None]:
teachers_total = teachers_df.groupby(by=['year_joined']).agg({'teacher_id':['count']})
teachers_total.columns = ['_'.join(col) for col in teachers_total.columns.values]
teachers_total = teachers_total.reset_index(drop=False)
teachers_total.head()

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010']
#palettedict = {'No AH':'#0F0064',
#               'Built before 2010':'#F3C400',
#               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(10,5))
lp = sns.lineplot(data=teachers_total,
         x='year_joined',
         y='teacher_id_count',
         marker='o',
         markersize=5,
#         hue='has_project_2010',
#         hue_order=hue_order,
#         palette=palettedict
         )
#lp.set_ylim(0,500000)
plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('')
plt.ylabel('Number of Teachers Joining Insight Timer',
          fontweight = 'bold',
          fontsize = 12)
plt.legend(loc="lower right")
#lp.set_xticks([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022])
#lp.set_yticks([0,100000,200000,300000,400000,500000])
#lp.set_yticklabels(['$0','$100,000','$200,000','$300,000','$400,000','$500,000'])
#plt.text(x = 2014, y=100000, s='AH in 9 of these 13 tracts by 2014', fontsize = 10,ha='left')
#plt.text(x = 2018, y=200000, s='AH in all of these tracts by 2018', fontsize = 10,ha='left')
;

In [None]:
teachers_country = teachers_df.groupby(by=['country','year_joined']).agg({'teacher_id':['count']})
teachers_country.columns = ['_'.join(col) for col in teachers_country.columns.values]
teachers_country = teachers_country.reset_index(drop=False)
teachers_country.head()

# COME BACK HERE

In [None]:
teachers_country=teachers_country.sort_values(by=['country','year_joined']).reset_index(drop=True)

prior_country = "-----------------"
prior_total_teachers = 0

for index, row in teachers_country.iterrows():
    if row.country == prior_country:
        teachers_country.loc[index,'total_teachers'] = row.teacher_id_count + prior_total_teachers
        prior_total_teachers = prior_total_teachers + row.teacher_id_count
    else:
        teachers_country.loc[index,'total_teachers'] = row.teacher_id_count
        prior_country = row.country
        prior_total_teachers = row.teacher_id_count
teachers_country

In [None]:
#PLEASE DELETE ME

#for index, row in teachers_country.iterrows():
#    total_teachers = teachers_country.loc[(teachers_country.year_joined <= row.year_joined)& (teachers_country.country == row.country)].agg({'teacher_id_count':['sum']})
#    teachers_country.loc[index,'total_teachers'] = total_teachers
#teachers_country

In [None]:
#total_teachers = 0
#for index, row in teachers_country.iterrows():
#    teachers_country.loc[index,'total_teachers'] = row.teacher_id_count + total_teachers
#    total_teachers=row.teacher_id_count + total_teachers
#teachers_country

In [None]:
teachers_country.loc[teachers_country.country=='Uruguay']

In [None]:
top5countries = list(teachers_country.loc[teachers_country.year_joined == 2022].sort_values(by=['total_teachers'],ascending=False).head(5).country.values)
top5countries

In [None]:
teachers_country = teachers_country.loc[teachers_country['country'].isin(top5countries)]

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010']
#palettedict = {'No AH':'#0F0064',
#               'Built before 2010':'#F3C400',
#               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(10,5))
lp = sns.lineplot(data=teachers_country,
         x='year_joined',
         y='total_teachers',
         marker='o',
         markersize=5,
         hue='country',
#         hue_order=hue_order,
#         palette=palettedict
         )
#lp.set_ylim(0,500000)
plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('')
plt.ylabel('Total Teachers on Insight Timer',
          fontweight = 'bold',
          fontsize = 12)
plt.legend(loc="upper left")
#lp.set_xticks([2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022])
#lp.set_yticks([0,100000,200000,300000,400000,500000])
#lp.set_yticklabels(['$0','$100,000','$200,000','$300,000','$400,000','$500,000'])
#plt.text(x = 2014, y=100000, s='AH in 9 of these 13 tracts by 2014', fontsize = 10,ha='left')
#plt.text(x = 2018, y=200000, s='AH in all of these tracts by 2018', fontsize = 10,ha='left')
;

### Suitable for

In [None]:
#92% of meditations are marked as suitable for 'Everyone'
suitable_meds = meditations_df[['suitable_for',
                                'plays',
                                'rating',
                                ]]
suitable_meds = suitable_meds.groupby(by=['suitable_for']).agg({'plays':['count','mean'],'rating':['mean']})
suitable_meds = suitable_meds.reset_index()
suitable_meds.columns = ['_'.join(col) for col in suitable_meds.columns.values]
suitable_meds = suitable_meds.sort_values(by=['plays_mean'],ascending=False)
suitable_meds.head()

In [None]:
fig, ax = plt.subplots(figsize=(7,6))
ahs = sns.barplot(data=suitable_meds,
                 x='suitable_for_',
                 y='plays_mean'#,
                 # palette=palette
                 )
plt.title('Average Plays by "Suitable For"',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Suitable For',
          fontweight = 'bold',
          fontsize = 12)
plt.ylabel('Average Plays',
          fontweight = 'bold',
          fontsize = 12)
#ahs.set_ylim(bottom=0,top=110)
#ahs.set_xticklabels(['No AH','Built before 2010','Built since 2010 (in or after 2010)'])
#plt.text(x = 0, y=100, s='97', fontsize = 14,ha='center') #No AH
#plt.text(x = 1, y=52, s='49', fontsize = 14,ha='center') #Built before 2010
#plt.text(x = 2, y=15, s='13', fontsize = 14,ha='center') #Built after 2010
;

# Track Type

In [None]:
#92% of meditations are marked as suitable for 'Everyone'
track_meds = meditations_df[['track_type',
                             'plays',
                             'rating',
                                ]]
track_meds = track_meds.groupby(by=['track_type']).agg({'plays':['count','mean'],'rating':['mean']})
track_meds = track_meds.reset_index()
track_meds.columns = ['_'.join(col) for col in track_meds.columns.values]
track_meds = track_meds.sort_values(by=['plays_mean'],ascending=False)
track_meds.head()

In [None]:
fig, ax = plt.subplots(figsize=(7,6))
ahs = sns.barplot(data=track_meds,
                 x='track_type_',
                 y='plays_mean'#,
                 # palette=palette
                 )
plt.title('Average Plays by Track Type',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Track Type',
          fontweight = 'bold',
          fontsize = 12)
plt.ylabel('Average Plays',
          fontweight = 'bold',
          fontsize = 12)
#ahs.set_ylim(bottom=0,top=110)
#ahs.set_xticklabels(['No AH','Built before 2010','Built since 2010 (in or after 2010)'])
#plt.text(x = 0, y=100, s='97', fontsize = 14,ha='center') #No AH
#plt.text(x = 1, y=52, s='49', fontsize = 14,ha='center') #Built before 2010
#plt.text(x = 2, y=15, s='13', fontsize = 14,ha='center') #Built after 2010
;

Do interest in specific topics vary by country?

### Duration

What duration do people like?

In [None]:
#for index, row in meditations_df.iterrows():
#    meditations_df.loc[index,'duration_min'] = row.duration.seconds / 60
#    meditations_df.loc[index,'upload_year'] = row.upload_date.year
    
meditations_df.head()

In [None]:
hue_order = ['No AH',
             'Built before 2010',
             'Built since 2010']
palettedict = {'No AH':'#0F0064',
               'Built before 2010':'#F3C400',
               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(7,7))
sp = sns.scatterplot(data=meditations_df,
                       x='duration_min',
                       y='plays',
                       #hue='has_project_2010',
                       #hue_order=hue_order,
                       #palette=palettedict
                      )
plt.legend(loc="upper right")

plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Duration (minutes)',
           fontweight = 'bold',
           fontsize = 12)
plt.ylabel('Total Plays',
           fontweight = 'bold',
           fontsize = 12)
#sp.set_xlim(0,9000)
#sp10.set_xticklabels(['$0','$25k','$50k','$75k','$100k',
#                      '$125k','$150k','$175k','$200k'])
#sp.set_ylim(0,60000000)
#sp.set_yticks([0,20000000,40000000,60000000,80000000,100000000,120000000,140000000,160000000])
#sp.set_yticklabels(['0','20M','40M','60M','80M','100M','120M','140M','160M'])

sp.axhline(400000, linewidth=1, linestyle=':', color='black')
sp.axvline(75, linewidth=1, linestyle=':', color='black')

#plt.text(x = 197000, y=180, s='upper quartile', fontsize = 10,ha='right') #LIP
;

In [None]:
print(meditations_df.shape[0])
meditations_trunc = meditations_df.loc[meditations_df.plays < 400000]
meditations_trunc = meditations_trunc.loc[meditations_trunc.duration_min < 75]
meditations_trunc.shape[0]

In [None]:
hue_order = ['No AH',
             'Built before 2010',
             'Built since 2010']
palettedict = {'No AH':'#0F0064',
               'Built before 2010':'#F3C400',
               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(7,7))
sp = sns.scatterplot(data=meditations_trunc,
                       x='duration_min',
                       y='plays',
                       #hue='has_project_2010',
                       #hue_order=hue_order,
                       #palette=palettedict
                      )
plt.legend(loc="upper right")

plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Duration (minutes)',
           fontweight = 'bold',
           fontsize = 12)
plt.ylabel('Total Plays',
           fontweight = 'bold',
           fontsize = 12)
#sp.set_xlim(0,9000)
#sp10.set_xticklabels(['$0','$25k','$50k','$75k','$100k',
#                      '$125k','$150k','$175k','$200k'])
#sp.set_ylim(0,60000000)
#sp.set_yticks([0,20000000,40000000,60000000,80000000,100000000,120000000,140000000,160000000])
#sp.set_yticklabels(['0','20M','40M','60M','80M','100M','120M','140M','160M'])

#sp10.axhline(169.189189, linewidth=1, linestyle=':', color='black', label='1qtr')

#plt.text(x = 197000, y=180, s='upper quartile', fontsize = 10,ha='right') #LIP
;

## Performance

Are there tiers of meditation teachers? Can they be meaningfully grouped in other ways?

In [None]:
#Correlation among the following: teacher followers, # of meditations, total plays, rating

How important are ratings, actually?

Are there topics with really high plays and ratings but fewer available meditations?

In [None]:
#for each topic, count of meditations? Median number of plays?
topics_demand_supply = meditations_df.merge(meditations_topics_df, how='inner',on=['meditation_id'])
topics_demand_supply = topics_demand_supply[['meditation_id','topic','plays',]]
topics_demand_supply = topics_demand_supply.groupby(by=['topic']).agg({'meditation_id':['count'],'plays':['median','mean','sum']})
topics_demand_supply = topics_demand_supply.reset_index()

In [None]:
topics_demand_supply.columns = ['_'.join(col) for col in topics_demand_supply.columns.values]
topics_demand_supply.head()

In [None]:
hue_order = ['No AH',
             'Built before 2010',
             'Built since 2010']
palettedict = {'No AH':'#0F0064',
               'Built before 2010':'#F3C400',
               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(7,7))
sp = sns.scatterplot(data=topics_demand_supply,
                       x='meditation_id_count',
                       y='plays_sum',
                       #hue='has_project_2010',
                       #hue_order=hue_order,
                       #palette=palettedict
                      )
plt.legend(loc="upper right")

plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Number of Meditations',
           fontweight = 'bold',
           fontsize = 12)
plt.ylabel('Total Plays',
           fontweight = 'bold',
           fontsize = 12)
#sp.set_xlim(0,9000)
#sp10.set_xticklabels(['$0','$25k','$50k','$75k','$100k',
#                      '$125k','$150k','$175k','$200k'])
#sp.set_ylim(0,60000000)
sp.set_yticks([0,20000000,40000000,60000000,80000000,100000000,120000000,140000000,160000000])
sp.set_yticklabels(['0','20M','40M','60M','80M','100M','120M','140M','160M'])

#sp10.axhline(169.189189, linewidth=1, linestyle=':', color='black', label='1qtr')

#plt.text(x = 197000, y=180, s='upper quartile', fontsize = 10,ha='right') #LIP
;

In [None]:
hue_order = ['No AH',
             'Built before 2010',
             'Built since 2010']
palettedict = {'No AH':'#0F0064',
               'Built before 2010':'#F3C400',
               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(7,7))
sp = sns.scatterplot(data=topics_demand_supply,
                       x='meditation_id_count',
                       y='plays_median',
                       #hue='has_project_2010',
                       #hue_order=hue_order,
                       #palette=palettedict
                      )
plt.legend(loc="upper right")

plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Number of Meditations',
           fontweight = 'bold',
           fontsize = 12)
plt.ylabel('Median Plays per Meditation',
           fontweight = 'bold',
           fontsize = 12)
#sp.set_xlim(0,9000)
#sp10.set_xticklabels(['$0','$25k','$50k','$75k','$100k',
#                      '$125k','$150k','$175k','$200k'])
#sp.set_ylim(0,17500)
#sp10.set_yticks([-100,0,100,200,300,400,500,600,700,800,900,1000])
#sp10.set_yticklabels(['-100%','0%','100%','200%','300%','400%','500%','600%','700%','800%','900%','1000%'])

#sp10.axhline(169.189189, linewidth=1, linestyle=':', color='black', label='1qtr')

#plt.text(x = 197000, y=180, s='upper quartile', fontsize = 10,ha='right') #LIP
;

Are some languages popular, based on the popularity of teachers?

***Suitable for***
- Vast majority of meditations are for "Everybody"
- So a different selection might help people find the meditation?
- How to the other selections compare in terms of performance? (plays, rating)

In [None]:
topics_demand_supply = topics_demand_supply.sort_values(by=['plays_sum'],ascending=False)
topics_demand_supply = topics_demand_supply.reset_index(drop=True)

In [None]:
toptp_topics_demand_supply = topics_demand_supply.head(10)
toptp_topics_demand_supply

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010',]
#palette = ['#0F0064','#F3C400','#E7800C']

fig, ax = plt.subplots(figsize=(7,6))
ahs = sns.barplot(data=toptp_topics_demand_supply,
                 x='plays_sum',
                 y='topic_',
                 # palette=palette
                 )
plt.title('Top 10 Topics by Total Plays',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Total Plays',
          fontweight = 'bold',
          fontsize = 12)
plt.ylabel('Topic',
          fontweight = 'bold',
          fontsize = 12)
#ahs.set_ylim(bottom=0,top=110)
#ahs.set_xticklabels(['No AH','Built before 2010','Built since 2010 (in or after 2010)'])
#plt.text(x = 0, y=100, s='97', fontsize = 14,ha='center') #No AH
#plt.text(x = 1, y=52, s='49', fontsize = 14,ha='center') #Built before 2010
#plt.text(x = 2, y=15, s='13', fontsize = 14,ha='center') #Built after 2010
;

In [None]:
topics_demand_supply
topics_demand_supply = topics_demand_supply.sort_values(by=['plays_median'],ascending=False)
topics_demand_supply = topics_demand_supply.reset_index(drop=True)

In [None]:
top_topics_demand_supply = topics_demand_supply.head(10)
top_topics_demand_supply

In [None]:
#hue_order = ['No AH',
#             'Built before 2010',
#             'Built since 2010',]
#palette = ['#0F0064','#F3C400','#E7800C']

fig, ax = plt.subplots(figsize=(7,6))
ahs = sns.barplot(data=top_topics_demand_supply,
                 x='plays_median',
                 y='topic_',
                 # palette=palette
                 )
plt.title('Top 10 Topics by Median Plays',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('Median Plays per Meditation',
          fontweight = 'bold',
          fontsize = 12)
plt.ylabel('Topic',
          fontweight = 'bold',
          fontsize = 12)
#ahs.set_ylim(bottom=0,top=110)
#ahs.set_xticklabels(['No AH','Built before 2010','Built since 2010 (in or after 2010)'])
#plt.text(x = 0, y=100, s='97', fontsize = 14,ha='center') #No AH
#plt.text(x = 1, y=52, s='49', fontsize = 14,ha='center') #Built before 2010
#plt.text(x = 2, y=15, s='13', fontsize = 14,ha='center') #Built after 2010
;

In [None]:
teachers_perf = teachers_df.merge(meditations_df, how='inner', on=['teacher_id'])
teachers_perf.info()

In [None]:
teachers_perf=teachers_perf[['teacher_id',
                             'teacher_name',
                             'image_url_x',
                             'years_since_joined',
                             'followers',
                             'meditation_id',
                             'plays',
                             'duration_min',
                             'rating',
                             'reviews']]
teachers_perf=teachers_perf.groupby(by=['teacher_id',
                                        'teacher_name',
                                        'years_since_joined',
                                        'followers',
                                        'image_url_x']).agg({'meditation_id':['count'],
                                                             'plays':['sum','mean'],
                                                             'duration_min':['mean'],
                                                             'rating':['mean'],
                                                             'reviews':['sum','mean']})
teachers_perf=teachers_perf.reset_index()
teachers_perf.columns = ['_'.join(col) for col in teachers_perf.columns.values]
teachers_perf=teachers_perf[['plays_sum',
                             'plays_mean',
                             'years_since_joined_',
                             'followers_',
                             'meditation_id_count',
                             'duration_min_mean',
                             'rating_mean',
                             'reviews_sum',
                             'reviews_mean']]

In [None]:
teachers_perf.corr()

In [None]:
sns.heatmap(teachers_perf.corr(),annot=True);

In [None]:
teachers_perf = teachers_perf.sort_values(by=['plays_sum'],ascending=False)
teachers_perf.head(10)


In [None]:
hue_order = ['No AH',
             'Built before 2010',
             'Built since 2010']
palettedict = {'No AH':'#0F0064',
               'Built before 2010':'#F3C400',
               'Built since 2010':'#E7800C'}
fig, ax = plt.subplots(figsize=(7,7))
sp = sns.scatterplot(data=teachers_perf,
                       x='meditation_id_count',
                       y='plays_sum',
                       #hue='has_project_2010',
                       #hue_order=hue_order,
                       #palette=palettedict
                      )
plt.legend(loc="upper right")

plt.title('',
          fontweight = 'bold',
          fontsize = 14)
plt.xlabel('followers',
           fontweight = 'bold',
           fontsize = 12)
plt.ylabel('Total Plays for All Meditations',
           fontweight = 'bold',
           fontsize = 12)
#sp.set_xlim(0,9000)
#sp10.set_xticklabels(['$0','$25k','$50k','$75k','$100k',
#                      '$125k','$150k','$175k','$200k'])
#sp.set_ylim(0,17500)
#sp10.set_yticks([-100,0,100,200,300,400,500,600,700,800,900,1000])
#sp10.set_yticklabels(['-100%','0%','100%','200%','300%','400%','500%','600%','700%','800%','900%','1000%'])

#sp10.axhline(169.189189, linewidth=1, linestyle=':', color='black', label='1qtr')

#plt.text(x = 197000, y=180, s='upper quartile', fontsize = 10,ha='right') #LIP
;